kernel -- virtio: Implement virtio-layer lwkt_serializer for devices
[dragonfly.git] / sys / dev / virtual / virtio / block / virtio_blk.c
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/dev/virtio/block/virtio_blk.c,v 1.4 2012/04/16 18:29:12 grehan Exp $
27  */
28
29 /* Driver for VirtIO block devices. */
30
31 #include <sys/cdefs.h>
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bio.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/sglist.h>
40 #include <sys/lock.h>
41 #include <sys/queue.h>
42 #include <sys/serialize.h>
43 #include <sys/buf2.h>
44 #include <sys/rman.h>
45 #include <sys/disk.h>
46 #include <sys/devicestat.h>
47
48 #include <dev/virtual/virtio/virtio/virtio.h>
49 #include <dev/virtual/virtio/virtio/virtqueue.h>
50 #include "virtio_blk.h"
51
52 struct vtblk_request {
53         struct virtio_blk_outhdr        vbr_hdr;
54         struct bio                      *vbr_bp;
55         uint8_t                         vbr_ack;
56
57         TAILQ_ENTRY(vtblk_request)      vbr_link;
58 };
59
60 struct vtblk_softc {
61         device_t                        vtblk_dev;
62         struct lwkt_serialize           vtblk_slz;
63         uint64_t                        vtblk_features;
64
65 #define VTBLK_FLAG_READONLY             0x0002
66 #define VTBLK_FLAG_DETACH               0x0004
67 #define VTBLK_FLAG_SUSPEND              0x0008
68         uint32_t                        vtblk_flags;
69
70         struct virtqueue                *vtblk_vq;
71         struct sglist                   *vtblk_sglist;
72         struct disk                     vtblk_disk;
73         cdev_t                          cdev;
74         struct devstat                  stats;
75
76         struct bio_queue_head           vtblk_bioq;
77         TAILQ_HEAD(, vtblk_request)     vtblk_req_free;
78         TAILQ_HEAD(, vtblk_request)     vtblk_req_ready;
79
80         int                             vtblk_sector_size;
81         int                             vtblk_max_nsegs;
82         int                             vtblk_unit;
83         int                             vtblk_request_count;
84
85         struct vtblk_request            vtblk_dump_request;
86 };
87
88 static struct virtio_feature_desc vtblk_feature_desc[] = {
89         { VIRTIO_BLK_F_BARRIER,         "HostBarrier"   },
90         { VIRTIO_BLK_F_SIZE_MAX,        "MaxSegSize"    },
91         { VIRTIO_BLK_F_SEG_MAX,         "MaxNumSegs"    },
92         { VIRTIO_BLK_F_GEOMETRY,        "DiskGeometry"  },
93         { VIRTIO_BLK_F_RO,              "ReadOnly"      },
94         { VIRTIO_BLK_F_BLK_SIZE,        "BlockSize"     },
95         { VIRTIO_BLK_F_SCSI,            "SCSICmds"      },
96         { VIRTIO_BLK_F_FLUSH,           "FlushCmd"      },
97         { VIRTIO_BLK_F_TOPOLOGY,        "Topology"      },
98
99         { 0, NULL }
100 };
101
102 static int      vtblk_modevent(module_t, int, void *);
103
104 static int      vtblk_probe(device_t);
105 static int      vtblk_attach(device_t);
106 static int      vtblk_detach(device_t);
107 static int      vtblk_suspend(device_t);
108 static int      vtblk_resume(device_t);
109 static int      vtblk_shutdown(device_t);
110
111 static void     vtblk_negotiate_features(struct vtblk_softc *);
112 static int      vtblk_maximum_segments(struct vtblk_softc *,
113                                        struct virtio_blk_config *);
114 static int      vtblk_alloc_virtqueue(struct vtblk_softc *);
115 static void     vtblk_alloc_disk(struct vtblk_softc *,
116                                  struct virtio_blk_config *);
117 /*
118  * Interface to the device switch.
119  */
120 static d_open_t         vtblk_open;
121 static d_strategy_t     vtblk_strategy;
122 static d_dump_t         vtblk_dump;
123
124 static struct dev_ops vbd_disk_ops = {
125         { "vbd", 200, D_DISK | D_MPSAFE },
126         .d_open         = vtblk_open,
127         .d_close        = nullclose,
128         .d_read         = physread,
129         .d_write        = physwrite,
130         .d_strategy     = vtblk_strategy,
131         .d_dump         = vtblk_dump,
132 };
133
134 static void vtblk_startio(struct vtblk_softc *);
135 static struct vtblk_request *vtblk_bio_request(struct vtblk_softc *);
136 static int vtblk_execute_request(struct vtblk_softc *, struct vtblk_request *);
137
138 static int              vtblk_vq_intr(void *);
139 static void             vtblk_complete(void *);
140
141 static void             vtblk_stop(struct vtblk_softc *);
142
143 static void             vtblk_drain_vq(struct vtblk_softc *, int);
144 static void             vtblk_drain(struct vtblk_softc *);
145
146 static int              vtblk_alloc_requests(struct vtblk_softc *);
147 static void             vtblk_free_requests(struct vtblk_softc *);
148 static struct vtblk_request *vtblk_dequeue_request(struct vtblk_softc *);
149 static void             vtblk_enqueue_request(struct vtblk_softc *,
150                                               struct vtblk_request *);
151
152 static struct vtblk_request *vtblk_dequeue_ready(struct vtblk_softc *);
153 static void             vtblk_enqueue_ready(struct vtblk_softc *,
154                                             struct vtblk_request *);
155
156 static void             vtblk_bio_error(struct bio *, int);
157
158 /* Tunables. */
159 static int vtblk_no_ident = 0;
160 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
161
162 /* Features desired/implemented by this driver. */
163 #define VTBLK_FEATURES \
164     (VIRTIO_BLK_F_BARRIER               | \
165      VIRTIO_BLK_F_SIZE_MAX              | \
166      VIRTIO_BLK_F_SEG_MAX               | \
167      VIRTIO_BLK_F_GEOMETRY              | \
168      VIRTIO_BLK_F_RO                    | \
169      VIRTIO_BLK_F_BLK_SIZE              | \
170      VIRTIO_BLK_F_FLUSH)
171
172 /*
173  * Each block request uses at least two segments - one for the header
174  * and one for the status.
175  */
176 #define VTBLK_MIN_SEGMENTS      2
177
178 static device_method_t vtblk_methods[] = {
179         /* Device methods. */
180         DEVMETHOD(device_probe,         vtblk_probe),
181         DEVMETHOD(device_attach,        vtblk_attach),
182         DEVMETHOD(device_detach,        vtblk_detach),
183         DEVMETHOD(device_suspend,       vtblk_suspend),
184         DEVMETHOD(device_resume,        vtblk_resume),
185         DEVMETHOD(device_shutdown,      vtblk_shutdown),
186
187         { 0, 0 }
188 };
189
190 static driver_t vtblk_driver = {
191         "vtblk",
192         vtblk_methods,
193         sizeof(struct vtblk_softc)
194 };
195 static devclass_t vtblk_devclass;
196
197 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass,
198               vtblk_modevent, NULL);
199 MODULE_VERSION(virtio_blk, 1);
200 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
201
202 static int
203 vtblk_modevent(module_t mod, int type, void *unused)
204 {
205         int error;
206
207         error = 0;
208
209         switch (type) {
210         case MOD_LOAD:
211                 break;
212         case MOD_UNLOAD:
213                 break;
214         case MOD_SHUTDOWN:
215                 break;
216         default:
217                 error = EOPNOTSUPP;
218                 break;
219         }
220
221         return (error);
222 }
223
224 static int
225 vtblk_probe(device_t dev)
226 {
227
228         if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
229                 return (ENXIO);
230
231         device_set_desc(dev, "VirtIO Block Adapter");
232
233         return (BUS_PROBE_DEFAULT);
234 }
235
236 static int
237 vtblk_attach(device_t dev)
238 {
239         struct vtblk_softc *sc;
240         struct virtio_blk_config blkcfg;
241         int error;
242
243         sc = device_get_softc(dev);
244         sc->vtblk_dev = dev;
245         sc->vtblk_unit = device_get_unit(dev);
246
247         lwkt_serialize_init(&sc->vtblk_slz);
248
249         bioq_init(&sc->vtblk_bioq);
250         TAILQ_INIT(&sc->vtblk_req_free);
251         TAILQ_INIT(&sc->vtblk_req_ready);
252
253         virtio_set_feature_desc(dev, vtblk_feature_desc);
254         vtblk_negotiate_features(sc);
255
256         if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
257                 sc->vtblk_flags |= VTBLK_FLAG_READONLY;
258
259         /* Get local copy of config. */
260         virtio_read_device_config(dev, 0, &blkcfg,
261                                   sizeof(struct virtio_blk_config));
262
263         /*
264          * With the current sglist(9) implementation, it is not easy
265          * for us to support a maximum segment size as adjacent
266          * segments are coalesced. For now, just make sure it's larger
267          * than the maximum supported transfer size.
268          */
269         if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
270                 if (blkcfg.size_max < MAXPHYS) {
271                         error = ENOTSUP;
272                         device_printf(dev, "host requires unsupported "
273                             "maximum segment size feature\n");
274                         goto fail;
275                 }
276         }
277
278         sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
279         if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
280                 error = EINVAL;
281                 device_printf(dev, "fewer than minimum number of segments "
282                     "allowed: %d\n", sc->vtblk_max_nsegs);
283                 goto fail;
284         }
285
286         /*
287          * Allocate working sglist. The number of segments may be too
288          * large to safely store on the stack.
289          */
290         sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
291         if (sc->vtblk_sglist == NULL) {
292                 error = ENOMEM;
293                 device_printf(dev, "cannot allocate sglist\n");
294                 goto fail;
295         }
296
297         error = vtblk_alloc_virtqueue(sc);
298         if (error) {
299                 device_printf(dev, "cannot allocate virtqueue\n");
300                 goto fail;
301         }
302
303         error = vtblk_alloc_requests(sc);
304         if (error) {
305                 device_printf(dev, "cannot preallocate requests\n");
306                 goto fail;
307         }
308
309         vtblk_alloc_disk(sc, &blkcfg);
310
311         error = virtio_setup_intr(dev, &sc->vtblk_slz);
312         if (error) {
313                 device_printf(dev, "cannot setup virtqueue interrupt\n");
314                 goto fail;
315         }
316
317         virtqueue_enable_intr(sc->vtblk_vq);
318
319 fail:
320         if (error)
321                 vtblk_detach(dev);
322
323         return (error);
324 }
325
326 static int
327 vtblk_detach(device_t dev)
328 {
329         struct vtblk_softc *sc;
330
331         sc = device_get_softc(dev);
332
333         lwkt_serialize_enter(&sc->vtblk_slz);
334         sc->vtblk_flags |= VTBLK_FLAG_DETACH;
335         if (device_is_attached(dev))
336                 vtblk_stop(sc);
337         lwkt_serialize_exit(&sc->vtblk_slz);
338
339         vtblk_drain(sc);
340
341         if (sc->vtblk_sglist != NULL) {
342                 sglist_free(sc->vtblk_sglist);
343                 sc->vtblk_sglist = NULL;
344         }
345
346         return (0);
347 }
348
349 static int
350 vtblk_suspend(device_t dev)
351 {
352         struct vtblk_softc *sc;
353
354         sc = device_get_softc(dev);
355
356         lwkt_serialize_enter(&sc->vtblk_slz);
357         sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
358         /* TODO Wait for any inflight IO to complete? */
359         lwkt_serialize_exit(&sc->vtblk_slz);
360
361         return (0);
362 }
363
364 static int
365 vtblk_resume(device_t dev)
366 {
367         struct vtblk_softc *sc;
368
369         sc = device_get_softc(dev);
370
371         lwkt_serialize_enter(&sc->vtblk_slz);
372         sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
373         /* TODO Resume IO? */
374         lwkt_serialize_exit(&sc->vtblk_slz);
375
376         return (0);
377 }
378
379 static int
380 vtblk_shutdown(device_t dev)
381 {
382         return (0);
383 }
384
385 static int
386 vtblk_open(struct dev_open_args *ap)
387 {
388         struct vtblk_softc *sc;
389         cdev_t dev = ap->a_head.a_dev;
390         sc = dev->si_drv1;
391         if (sc == NULL)
392                 return (ENXIO);
393
394         return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
395 }
396
397 static int
398 vtblk_dump(struct dev_dump_args *ap)
399 {
400         struct vtblk_softc *sc;
401         int error;
402
403         error = 0;
404
405         cdev_t dev = ap->a_head.a_dev;
406         sc = dev->si_drv1;
407
408         return (ENXIO);
409 }
410
411 static int
412 vtblk_strategy(struct dev_strategy_args *ap)
413 {
414         struct vtblk_softc *sc;
415         cdev_t dev = ap->a_head.a_dev;
416         sc = dev->si_drv1;
417         struct bio *bio = ap->a_bio;
418         struct buf *bp = bio->bio_buf;
419
420         if (sc == NULL) {
421                 vtblk_bio_error(bio, EINVAL);
422                 return EINVAL;
423         }
424
425         /*
426          * Fail any write if RO. Unfortunately, there does not seem to
427          * be a better way to report our readonly'ness to GEOM above.
428          *
429          * XXX: Is that true in DFly?
430          */
431         if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
432             (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) {
433                 vtblk_bio_error(bio, EROFS);
434                 return (EINVAL);
435         }
436
437         lwkt_serialize_enter(&sc->vtblk_slz);
438         if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) {
439                 devstat_start_transaction(&sc->stats);
440                 bioqdisksort(&sc->vtblk_bioq, bio);
441                 vtblk_startio(sc);
442         } else {
443                 vtblk_bio_error(bio, ENXIO);
444         }
445         lwkt_serialize_exit(&sc->vtblk_slz);
446         return 0;
447 }
448
449 static void
450 vtblk_negotiate_features(struct vtblk_softc *sc)
451 {
452         device_t dev;
453         uint64_t features;
454
455         dev = sc->vtblk_dev;
456         features = VTBLK_FEATURES;
457
458         sc->vtblk_features = virtio_negotiate_features(dev, features);
459 }
460
461 static int
462 vtblk_maximum_segments(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
463 {
464         device_t dev;
465         int nsegs;
466
467         dev = sc->vtblk_dev;
468         nsegs = VTBLK_MIN_SEGMENTS;
469
470         if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
471                 nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1);
472         } else {
473                 nsegs += 1;
474         }
475
476         return (nsegs);
477 }
478
479 static int
480 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
481 {
482         device_t dev;
483         struct vq_alloc_info vq_info;
484
485         dev = sc->vtblk_dev;
486
487         VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
488                            vtblk_vq_intr, sc, &sc->vtblk_vq,
489                            "%s request", device_get_nameunit(dev));
490
491         return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
492 }
493
494 static void
495 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
496 {
497
498         struct disk_info info;
499
500         /* construct the disk_info */
501         bzero(&info, sizeof(info));
502
503         if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE))
504                 sc->vtblk_sector_size = blkcfg->blk_size;
505         else
506                 sc->vtblk_sector_size = DEV_BSIZE;
507
508         info.d_media_blksize = sc->vtblk_sector_size;
509         info.d_media_blocks = blkcfg->capacity;
510
511         info.d_ncylinders = blkcfg->geometry.cylinders;
512         info.d_nheads = blkcfg->geometry.heads;
513         info.d_secpertrack = blkcfg->geometry.sectors;
514
515         info.d_secpercyl = info.d_secpertrack * info.d_nheads;
516
517         devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev),
518                           DEV_BSIZE, DEVSTAT_ALL_SUPPORTED,
519                           DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
520                           DEVSTAT_PRIORITY_DISK);
521
522         /* attach a generic disk device to ourselves */
523         sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk,
524                                &vbd_disk_ops);
525
526         sc->cdev->si_drv1 = sc;
527         disk_setdiskinfo(&sc->vtblk_disk, &info);
528 }
529
530 static void
531 vtblk_startio(struct vtblk_softc *sc)
532 {
533         struct virtqueue *vq;
534         struct vtblk_request *req;
535         int enq;
536
537         vq = sc->vtblk_vq;
538         enq = 0;
539
540         ASSERT_SERIALIZED(&sc->vtblk_slz);
541         
542         if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
543                 return;
544
545         while (!virtqueue_full(vq)) {
546                 if ((req = vtblk_dequeue_ready(sc)) == NULL)
547                         req = vtblk_bio_request(sc);
548                 if (req == NULL)
549                         break;
550
551                 if (vtblk_execute_request(sc, req) != 0) {
552                         vtblk_enqueue_ready(sc, req);
553                         break;
554                 }
555
556                 enq++;
557         }
558
559         if (enq > 0)
560                 virtqueue_notify(vq, &sc->vtblk_slz);
561 }
562
563 static struct vtblk_request *
564 vtblk_bio_request(struct vtblk_softc *sc)
565 {
566         struct bio_queue_head *bioq;
567         struct vtblk_request *req;
568         struct bio *bio;
569         struct buf *bp;
570
571         bioq = &sc->vtblk_bioq;
572
573         if (bioq_first(bioq) == NULL)
574                 return (NULL);
575
576         req = vtblk_dequeue_request(sc);
577         if (req == NULL)
578                 return (NULL);
579
580         bio = bioq_takefirst(bioq);
581         req->vbr_bp = bio;
582         req->vbr_ack = -1;
583         req->vbr_hdr.ioprio = 1;
584         bp = bio->bio_buf;
585
586         switch (bp->b_cmd) {
587         case BUF_CMD_FLUSH:
588                 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
589                 break;
590         case BUF_CMD_READ:
591                 req->vbr_hdr.type = VIRTIO_BLK_T_IN;
592                 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
593                 break;
594         case BUF_CMD_WRITE:
595                 req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
596                 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
597                 break;
598         default:
599                 KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd));
600                 req->vbr_hdr.type = -1;
601                 break;
602         }
603
604         if (bp->b_flags & B_ORDERED)
605                 req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
606
607         return (req);
608 }
609
610 static int
611 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
612 {
613         struct sglist *sg;
614         struct bio *bio;
615         struct buf *bp;
616         int writable, error;
617
618         sg = sc->vtblk_sglist;
619         bio = req->vbr_bp;
620         bp = bio->bio_buf;
621         writable = 0;
622
623         /*
624          * sglist is live throughout this subroutine.
625          */
626         sglist_reset(sg);
627         
628         error = sglist_append(sg, &req->vbr_hdr,
629                               sizeof(struct virtio_blk_outhdr));
630         KASSERT(error == 0, ("error adding header to sglist"));
631         KASSERT(sg->sg_nseg == 1,
632             ("header spanned multiple segments: %d", sg->sg_nseg));
633
634         if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) {
635                 error = sglist_append(sg, bp->b_data, bp->b_bcount);
636                 KASSERT(error == 0, ("error adding buffer to sglist"));
637
638                 /* BUF_CMD_READ means the host writes into our buffer. */
639                 if (bp->b_cmd == BUF_CMD_READ)
640                         writable += sg->sg_nseg - 1;
641         }
642
643         error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
644         KASSERT(error == 0, ("error adding ack to sglist"));
645         writable++;
646
647         KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
648             ("fewer than min segments: %d", sg->sg_nseg));
649
650         error = virtqueue_enqueue(sc->vtblk_vq, req, sg,
651                                   sg->sg_nseg - writable, writable);
652
653         sglist_reset(sg);
654
655         return (error);
656 }
657
658 static int
659 vtblk_vq_intr(void *xsc)
660 {
661         vtblk_complete(xsc);
662
663         return (1);
664 }
665
666 static void
667 vtblk_complete(void *arg)
668 {
669         struct vtblk_softc *sc;
670         struct vtblk_request *req;
671         struct virtqueue *vq;
672         struct bio *bio;
673         struct buf *bp;
674         
675         sc = arg;
676         vq = sc->vtblk_vq;
677
678         lwkt_serialize_handler_disable(&sc->vtblk_slz);
679         virtqueue_disable_intr(sc->vtblk_vq);
680         ASSERT_SERIALIZED(&sc->vtblk_slz);
681
682 retry:
683         if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
684                 return;
685
686         while ((req = virtqueue_dequeue(vq, NULL)) != NULL) {
687                 bio = req->vbr_bp;
688                 bp = bio->bio_buf;
689
690                 if (req->vbr_ack == VIRTIO_BLK_S_OK)
691                         bp->b_resid = 0;
692                 else {
693                         bp->b_flags |= B_ERROR;
694                         if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) {
695                                 bp->b_error = ENOTSUP;
696                         } else {
697                                 bp->b_error = EIO;
698                         }
699                 }
700
701                 devstat_end_transaction_buf(&sc->stats, bio->bio_buf);
702
703                 lwkt_serialize_exit(&sc->vtblk_slz);
704                 /*
705                  * Unlocking the controller around biodone() does not allow
706                  * processing further device interrupts; when we queued
707                  * vtblk_complete, we disabled interrupts. It will allow
708                  * concurrent vtblk_strategy/_startio command dispatches.
709                  */
710                 biodone(bio);
711                 lwkt_serialize_enter(&sc->vtblk_slz);
712
713                 vtblk_enqueue_request(sc, req);
714         }
715
716         vtblk_startio(sc);
717
718         if (virtqueue_enable_intr(vq) != 0) {
719                 /* 
720                  * If new virtqueue entries appeared immediately after
721                  * enabling interrupts, process them now. Release and
722                  * retake softcontroller lock to try to avoid blocking
723                  * I/O dispatch for too long.
724                  */
725                 virtqueue_disable_intr(vq);
726                 goto retry;
727         }
728         lwkt_serialize_handler_enable(&sc->vtblk_slz);
729 }
730
731 static void
732 vtblk_stop(struct vtblk_softc *sc)
733 {
734         virtqueue_disable_intr(sc->vtblk_vq);
735         virtio_stop(sc->vtblk_dev);
736 }
737
738 static void
739 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
740 {
741         struct virtqueue *vq;
742         struct vtblk_request *req;
743         int last;
744
745         vq = sc->vtblk_vq;
746         last = 0;
747
748         while ((req = virtqueue_drain(vq, &last)) != NULL) {
749                 if (!skip_done)
750                         vtblk_bio_error(req->vbr_bp, ENXIO);
751
752                 vtblk_enqueue_request(sc, req);
753         }
754
755         KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
756 }
757
758 static void
759 vtblk_drain(struct vtblk_softc *sc)
760 {
761         struct bio_queue_head *bioq;
762         struct vtblk_request *req;
763         struct bio *bp;
764
765         bioq = &sc->vtblk_bioq;
766
767         if (sc->vtblk_vq != NULL)
768                 vtblk_drain_vq(sc, 0);
769
770         while ((req = vtblk_dequeue_ready(sc)) != NULL) {
771                 vtblk_bio_error(req->vbr_bp, ENXIO);
772                 vtblk_enqueue_request(sc, req);
773         }
774
775         while (bioq_first(bioq) != NULL) {
776                 bp = bioq_takefirst(bioq);
777                 vtblk_bio_error(bp, ENXIO);
778         }
779
780         vtblk_free_requests(sc);
781 }
782
783 static int
784 vtblk_alloc_requests(struct vtblk_softc *sc)
785 {
786         struct vtblk_request *req;
787         int i, nreqs;
788
789         nreqs = virtqueue_size(sc->vtblk_vq);
790
791         /*
792          * Preallocate sufficient requests to keep the virtqueue full. Each
793          * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
794          * the number allocated when indirect descriptors are not available.
795          */
796         nreqs /= VTBLK_MIN_SEGMENTS;
797
798         for (i = 0; i < nreqs; i++) {
799                 req = kmalloc(sizeof(struct vtblk_request), M_DEVBUF, M_WAITOK);
800
801                 sc->vtblk_request_count++;
802                 vtblk_enqueue_request(sc, req);
803         }
804
805         return (0);
806 }
807
808 static void
809 vtblk_free_requests(struct vtblk_softc *sc)
810 {
811         struct vtblk_request *req;
812
813         while ((req = vtblk_dequeue_request(sc)) != NULL) {
814                 sc->vtblk_request_count--;
815                 kfree(req, M_DEVBUF);
816         }
817
818         KASSERT(sc->vtblk_request_count == 0, ("leaked requests"));
819 }
820
821 static struct vtblk_request *
822 vtblk_dequeue_request(struct vtblk_softc *sc)
823 {
824         struct vtblk_request *req;
825
826         req = TAILQ_FIRST(&sc->vtblk_req_free);
827         if (req != NULL)
828                 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
829
830         return (req);
831 }
832
833 static void
834 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req)
835 {
836         bzero(req, sizeof(struct vtblk_request));
837         TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
838 }
839
840 static struct vtblk_request *
841 vtblk_dequeue_ready(struct vtblk_softc *sc)
842 {
843         struct vtblk_request *req;
844
845         req = TAILQ_FIRST(&sc->vtblk_req_ready);
846         if (req != NULL)
847                 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
848
849         return (req);
850 }
851
852 static void
853 vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
854 {
855         TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
856 }
857
858 static void
859 vtblk_bio_error(struct bio *bp, int error)
860 {
861         biodone(bp);
862 }