From 11447b5977451cf8571f2d8f5f6b8e06a960584c Mon Sep 17 00:00:00 2001 From: Venkatesh Srinivas Date: Mon, 24 Dec 2012 13:41:27 -0500 Subject: [PATCH] kernel -- Import virtio & virtio-block drivers. virtio-blk provides a paravirtualized storage controller, with one disk per virtio device. This driver is based on Tim Bisson's port of FreeBSD's virtio and virtio-blk devices. Differences from Tim's port: * Import all FreeBSD updates from 4/16 on. * Remove indirect descriptor support from virtio device. * Mark devices as D_MPSAFE; removes mplock around disk routines, they are all correctly self-synchronized. * Implement devstat support. * Move I/O completion routine to threaded taskqueue. * Do not hold target spinlock around virtqueue notify. * Move objcache caches to kmalloc. --- sys/dev/virtual/Makefile | 2 +- sys/dev/virtual/virtio/Makefile | 28 + sys/dev/virtual/virtio/block/Makefile | 39 + sys/dev/virtual/virtio/block/virtio_blk.c | 1030 ++++++++++++++++ sys/dev/virtual/virtio/block/virtio_blk.h | 119 ++ sys/dev/virtual/virtio/pci/Makefile | 40 + sys/dev/virtual/virtio/pci/virtio_bus_if.h | 123 ++ sys/dev/virtual/virtio/pci/virtio_if.h | 25 + sys/dev/virtual/virtio/pci/virtio_pci.c | 1092 +++++++++++++++++ sys/dev/virtual/virtio/pci/virtio_pci.h | 87 ++ sys/dev/virtual/virtio/virtio/Makefile | 10 + sys/dev/virtual/virtio/virtio/virtio.c | 281 +++++ sys/dev/virtual/virtio/virtio/virtio.h | 141 +++ sys/dev/virtual/virtio/virtio/virtio_bus_if.m | 90 ++ sys/dev/virtual/virtio/virtio/virtio_if.m | 43 + sys/dev/virtual/virtio/virtio/virtio_ring.h | 164 +++ sys/dev/virtual/virtio/virtio/virtqueue.c | 639 ++++++++++ sys/dev/virtual/virtio/virtio/virtqueue.h | 95 ++ 18 files changed, 4047 insertions(+), 1 deletion(-) create mode 100644 sys/dev/virtual/virtio/Makefile create mode 100644 sys/dev/virtual/virtio/block/Makefile create mode 100644 sys/dev/virtual/virtio/block/virtio_blk.c create mode 100644 sys/dev/virtual/virtio/block/virtio_blk.h create mode 100644 sys/dev/virtual/virtio/pci/Makefile create mode 100644 sys/dev/virtual/virtio/pci/virtio_bus_if.h create mode 100644 sys/dev/virtual/virtio/pci/virtio_if.h create mode 100644 sys/dev/virtual/virtio/pci/virtio_pci.c create mode 100644 sys/dev/virtual/virtio/pci/virtio_pci.h create mode 100644 sys/dev/virtual/virtio/virtio/Makefile create mode 100644 sys/dev/virtual/virtio/virtio/virtio.c create mode 100644 sys/dev/virtual/virtio/virtio/virtio.h create mode 100644 sys/dev/virtual/virtio/virtio/virtio_bus_if.m create mode 100644 sys/dev/virtual/virtio/virtio/virtio_if.m create mode 100644 sys/dev/virtual/virtio/virtio/virtio_ring.h create mode 100644 sys/dev/virtual/virtio/virtio/virtqueue.c create mode 100644 sys/dev/virtual/virtio/virtio/virtqueue.h diff --git a/sys/dev/virtual/Makefile b/sys/dev/virtual/Makefile index 3514db7aea..f1a4af811e 100644 --- a/sys/dev/virtual/Makefile +++ b/sys/dev/virtual/Makefile @@ -1,6 +1,6 @@ .include "${.CURDIR}/../../platform/${MACHINE_PLATFORM}/Makefile.inc" -SUBDIR= vkernel +SUBDIR= vkernel virtio .for dir in ${SUBDIR} .if empty(DEV_SUPPORT:Mvirtual) && \ diff --git a/sys/dev/virtual/virtio/Makefile b/sys/dev/virtual/virtio/Makefile new file mode 100644 index 0000000000..b83a94bb9c --- /dev/null +++ b/sys/dev/virtual/virtio/Makefile @@ -0,0 +1,28 @@ +# +# $FreeBSD$ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# + +SUBDIR= virtio pci block + +.include diff --git a/sys/dev/virtual/virtio/block/Makefile b/sys/dev/virtual/virtio/block/Makefile new file mode 100644 index 0000000000..f25b6d79b5 --- /dev/null +++ b/sys/dev/virtual/virtio/block/Makefile @@ -0,0 +1,39 @@ +# +# $FreeBSD$ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# + +.PATH: ${.CURDIR} + +KMOD= virtio_blk +SRCS= virtio_blk.c +SRCS+= virtio_bus_if.h virtio_if.h +SRCS+= bus_if.h device_if.h + +CFLAGS+= -I${.CURDIR}/.. + +MFILES= kern/bus_if.m kern/device_if.m \ + dev/virtual/virtio/virtio/virtio_bus_if.m \ + dev/virtual/virtio/virtio/virtio_if.m + +.include diff --git a/sys/dev/virtual/virtio/block/virtio_blk.c b/sys/dev/virtual/virtio/block/virtio_blk.c new file mode 100644 index 0000000000..d172b843b9 --- /dev/null +++ b/sys/dev/virtual/virtio/block/virtio_blk.c @@ -0,0 +1,1030 @@ +/*- + * Copyright (c) 2011, Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: src/sys/dev/virtio/block/virtio_blk.c,v 1.4 2012/04/16 18:29:12 grehan Exp $ + */ + +/* Driver for VirtIO block devices. */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include "virtio_blk.h" + +struct vtblk_request { + struct virtio_blk_outhdr vbr_hdr; + struct bio *vbr_bp; + uint8_t vbr_ack; + + TAILQ_ENTRY(vtblk_request) vbr_link; +}; + +struct vtblk_softc { + device_t vtblk_dev; + struct spinlock vtblk_mtx; + uint64_t vtblk_features; + +#define VTBLK_FLAG_READONLY 0x0002 +#define VTBLK_FLAG_DETACH 0x0004 +#define VTBLK_FLAG_SUSPEND 0x0008 +#define VTBLK_FLAG_DUMPING 0x0010 + uint32_t vtblk_flags; + + struct virtqueue *vtblk_vq; + struct sglist *vtblk_sglist; + struct disk vtblk_disk; + cdev_t cdev; + struct devstat stats; + + struct bio_queue_head vtblk_bioq; + TAILQ_HEAD(, vtblk_request) vtblk_req_free; + TAILQ_HEAD(, vtblk_request) vtblk_req_ready; + + struct task vtblk_intr_task; + + int vtblk_sector_size; + int vtblk_max_nsegs; + int vtblk_unit; + int vtblk_request_count; + + struct vtblk_request vtblk_dump_request; +}; + +static struct virtio_feature_desc vtblk_feature_desc[] = { + { VIRTIO_BLK_F_BARRIER, "HostBarrier" }, + { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" }, + { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" }, + { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" }, + { VIRTIO_BLK_F_RO, "ReadOnly" }, + { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" }, + { VIRTIO_BLK_F_SCSI, "SCSICmds" }, + { VIRTIO_BLK_F_FLUSH, "FlushCmd" }, + { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, + + { 0, NULL } +}; + +static int vtblk_modevent(module_t, int, void *); + +static int vtblk_probe(device_t); +static int vtblk_attach(device_t); +static int vtblk_detach(device_t); +static int vtblk_suspend(device_t); +static int vtblk_resume(device_t); +static int vtblk_shutdown(device_t); + +static void vtblk_negotiate_features(struct vtblk_softc *); +static int vtblk_maximum_segments(struct vtblk_softc *, + struct virtio_blk_config *); +static int vtblk_alloc_virtqueue(struct vtblk_softc *); +static void vtblk_alloc_disk(struct vtblk_softc *, + struct virtio_blk_config *); +/* + * Interface to the device switch. + */ +static d_open_t vtblk_open; +static d_strategy_t vtblk_strategy; +static d_dump_t vtblk_dump; + +static struct dev_ops vbd_disk_ops = { + { "vbd", 200, D_DISK | D_MPSAFE }, + .d_open = vtblk_open, + .d_close = nullclose, + .d_read = physread, + .d_write = physwrite, + .d_strategy = vtblk_strategy, + .d_dump = vtblk_dump, +}; + +static void vtblk_startio(struct vtblk_softc *); +static struct vtblk_request *vtblk_bio_request(struct vtblk_softc *); +static int vtblk_execute_request(struct vtblk_softc *, + struct vtblk_request *); + +static int vtblk_vq_intr(void *); +static void vtblk_intr_task(void *, int); + +static void vtblk_stop(struct vtblk_softc *); + +static void vtblk_prepare_dump(struct vtblk_softc *); +static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t); +static int vtblk_flush_dump(struct vtblk_softc *); +static int vtblk_poll_request(struct vtblk_softc *, + struct vtblk_request *); + +static void vtblk_drain_vq(struct vtblk_softc *, int); +static void vtblk_drain(struct vtblk_softc *); + +static int vtblk_alloc_requests(struct vtblk_softc *); +static void vtblk_free_requests(struct vtblk_softc *); +static struct vtblk_request *vtblk_dequeue_request(struct vtblk_softc *); +static void vtblk_enqueue_request(struct vtblk_softc *, + struct vtblk_request *); + +static struct vtblk_request *vtblk_dequeue_ready(struct vtblk_softc *); +static void vtblk_enqueue_ready(struct vtblk_softc *, + struct vtblk_request *); + +static void vtblk_bio_error(struct bio *, int); + +/* Tunables. */ +static int vtblk_no_ident = 0; +TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident); + +/* Features desired/implemented by this driver. */ +#define VTBLK_FEATURES \ + (VIRTIO_BLK_F_BARRIER | \ + VIRTIO_BLK_F_SIZE_MAX | \ + VIRTIO_BLK_F_SEG_MAX | \ + VIRTIO_BLK_F_GEOMETRY | \ + VIRTIO_BLK_F_RO | \ + VIRTIO_BLK_F_BLK_SIZE | \ + VIRTIO_BLK_F_FLUSH) + +#define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx +#define VTBLK_LOCK_INIT(_sc) spin_init(&(_sc)->vtblk_mtx) +#define VTBLK_LOCK(_sc) spin_lock(VTBLK_MTX((_sc))) +#define VTBLK_TRYLOCK(_sc) spin_trylock(VTBLK_MTX((_sc))) +#define VTBLK_UNLOCK(_sc) spin_unlock(VTBLK_MTX((_sc))) +#define VTBLK_LOCK_DESTROY(_sc) spin_uninit(VTBLK_MTX((_sc))) + +#define VTBLK_LOCK_ASSERT(_sc) +#define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) + +/* + * Each block request uses at least two segments - one for the header + * and one for the status. + */ +#define VTBLK_MIN_SEGMENTS 2 + +static device_method_t vtblk_methods[] = { + /* Device methods. */ + DEVMETHOD(device_probe, vtblk_probe), + DEVMETHOD(device_attach, vtblk_attach), + DEVMETHOD(device_detach, vtblk_detach), + DEVMETHOD(device_suspend, vtblk_suspend), + DEVMETHOD(device_resume, vtblk_resume), + DEVMETHOD(device_shutdown, vtblk_shutdown), + + { 0, 0 } +}; + +static driver_t vtblk_driver = { + "vtblk", + vtblk_methods, + sizeof(struct vtblk_softc) +}; +static devclass_t vtblk_devclass; + +DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, + vtblk_modevent, NULL); +MODULE_VERSION(virtio_blk, 1); +MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1); + +static int +vtblk_modevent(module_t mod, int type, void *unused) +{ + int error; + + error = 0; + + switch (type) { + case MOD_LOAD: + break; + case MOD_UNLOAD: + break; + case MOD_SHUTDOWN: + break; + default: + error = EOPNOTSUPP; + break; + } + + return (error); +} + +static int +vtblk_probe(device_t dev) +{ + + if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK) + return (ENXIO); + + device_set_desc(dev, "VirtIO Block Adapter"); + + return (BUS_PROBE_DEFAULT); +} + +static int +vtblk_attach(device_t dev) +{ + struct vtblk_softc *sc; + struct virtio_blk_config blkcfg; + int error; + + sc = device_get_softc(dev); + sc->vtblk_dev = dev; + sc->vtblk_unit = device_get_unit(dev); + + VTBLK_LOCK_INIT(sc); + + bioq_init(&sc->vtblk_bioq); + TAILQ_INIT(&sc->vtblk_req_free); + TAILQ_INIT(&sc->vtblk_req_ready); + + virtio_set_feature_desc(dev, vtblk_feature_desc); + vtblk_negotiate_features(sc); + + if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) + sc->vtblk_flags |= VTBLK_FLAG_READONLY; + + /* Get local copy of config. */ + virtio_read_device_config(dev, 0, &blkcfg, + sizeof(struct virtio_blk_config)); + + /* + * With the current sglist(9) implementation, it is not easy + * for us to support a maximum segment size as adjacent + * segments are coalesced. For now, just make sure it's larger + * than the maximum supported transfer size. + */ + if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { + if (blkcfg.size_max < MAXPHYS) { + error = ENOTSUP; + device_printf(dev, "host requires unsupported " + "maximum segment size feature\n"); + goto fail; + } + } + + sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); + if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { + error = EINVAL; + device_printf(dev, "fewer than minimum number of segments " + "allowed: %d\n", sc->vtblk_max_nsegs); + goto fail; + } + + /* + * Allocate working sglist. The number of segments may be too + * large to safely store on the stack. + */ + sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT); + if (sc->vtblk_sglist == NULL) { + error = ENOMEM; + device_printf(dev, "cannot allocate sglist\n"); + goto fail; + } + + error = vtblk_alloc_virtqueue(sc); + if (error) { + device_printf(dev, "cannot allocate virtqueue\n"); + goto fail; + } + + error = vtblk_alloc_requests(sc); + if (error) { + device_printf(dev, "cannot preallocate requests\n"); + goto fail; + } + + vtblk_alloc_disk(sc, &blkcfg); + + TASK_INIT(&sc->vtblk_intr_task, 0, vtblk_intr_task, sc); + + error = virtio_setup_intr(dev); + if (error) { + device_printf(dev, "cannot setup virtqueue interrupt\n"); + goto fail; + } + + virtqueue_enable_intr(sc->vtblk_vq); + +fail: + if (error) + vtblk_detach(dev); + + return (error); +} + +static int +vtblk_detach(device_t dev) +{ + struct vtblk_softc *sc; + + sc = device_get_softc(dev); + + VTBLK_LOCK(sc); + sc->vtblk_flags |= VTBLK_FLAG_DETACH; + if (device_is_attached(dev)) + vtblk_stop(sc); + VTBLK_UNLOCK(sc); + + taskqueue_drain(taskqueue_thread[mycpuid], &sc->vtblk_intr_task); + + vtblk_drain(sc); + + if (sc->vtblk_sglist != NULL) { + sglist_free(sc->vtblk_sglist); + sc->vtblk_sglist = NULL; + } + + VTBLK_LOCK_DESTROY(sc); + + return (0); +} + +static int +vtblk_suspend(device_t dev) +{ + struct vtblk_softc *sc; + + sc = device_get_softc(dev); + + VTBLK_LOCK(sc); + sc->vtblk_flags |= VTBLK_FLAG_SUSPEND; + /* TODO Wait for any inflight IO to complete? */ + VTBLK_UNLOCK(sc); + + return (0); +} + +static int +vtblk_resume(device_t dev) +{ + struct vtblk_softc *sc; + + sc = device_get_softc(dev); + + VTBLK_LOCK(sc); + sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND; + /* TODO Resume IO? */ + VTBLK_UNLOCK(sc); + + return (0); +} + +static int +vtblk_shutdown(device_t dev) +{ + return (0); +} + +static int +vtblk_open(struct dev_open_args *ap) +{ + struct vtblk_softc *sc; + cdev_t dev = ap->a_head.a_dev; + sc = dev->si_drv1; + if (sc == NULL) + return (ENXIO); + + return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0); +} + +static int +vtblk_dump(struct dev_dump_args *ap) +{ + struct vtblk_softc *sc; + int error; + + error = 0; + + cdev_t dev = ap->a_head.a_dev; + sc = dev->si_drv1; + + if (sc == NULL) + return (ENXIO); + + if (VTBLK_TRYLOCK(sc) == 0) { + device_printf(sc->vtblk_dev, + "softc already locked, cannot dump...\n"); + return (EBUSY); + } + + if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) { + vtblk_prepare_dump(sc); + sc->vtblk_flags |= VTBLK_FLAG_DUMPING; + } + + if (ap->a_length > 0) { + error = vtblk_write_dump(sc, ap->a_virtual, ap->a_offset, + ap->a_length); + } else if (ap->a_virtual == NULL && ap->a_offset == 0) { + error = vtblk_flush_dump(sc); + } + + VTBLK_UNLOCK(sc); + + return (error); +} + +static int +vtblk_strategy(struct dev_strategy_args *ap) +{ + struct vtblk_softc *sc; + cdev_t dev = ap->a_head.a_dev; + sc = dev->si_drv1; + struct bio *bio = ap->a_bio; + struct buf *bp = bio->bio_buf; + + if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) { + KKASSERT(bp->b_count > 0); + } + + if (sc == NULL) { + vtblk_bio_error(bio, EINVAL); + return EINVAL; + } + + /* + * Fail any write if RO. Unfortunately, there does not seem to + * be a better way to report our readonly'ness to GEOM above. + * + * XXX: Is that true in DFly? + */ + if (sc->vtblk_flags & VTBLK_FLAG_READONLY && + (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) { + vtblk_bio_error(bio, EROFS); + return (EINVAL); + } + + VTBLK_LOCK(sc); + if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) { + devstat_start_transaction(&sc->stats); + bioqdisksort(&sc->vtblk_bioq, bio); + vtblk_startio(sc); + } else { + vtblk_bio_error(bio, ENXIO); + } + VTBLK_UNLOCK(sc); + return 0; +} + +static void +vtblk_negotiate_features(struct vtblk_softc *sc) +{ + device_t dev; + uint64_t features; + + dev = sc->vtblk_dev; + features = VTBLK_FEATURES; + + sc->vtblk_features = virtio_negotiate_features(dev, features); +} + +static int +vtblk_maximum_segments(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) +{ + device_t dev; + int nsegs; + + dev = sc->vtblk_dev; + nsegs = VTBLK_MIN_SEGMENTS; + + if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) { + nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1); + } else { + nsegs += 1; + } + + return (nsegs); +} + +static int +vtblk_alloc_virtqueue(struct vtblk_softc *sc) +{ + device_t dev; + struct vq_alloc_info vq_info; + + dev = sc->vtblk_dev; + + VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs, + vtblk_vq_intr, sc, &sc->vtblk_vq, + "%s request", device_get_nameunit(dev)); + + return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info)); +} + +static void +vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) +{ + + struct disk_info info; + + /* construct the disk_info */ + bzero(&info, sizeof(info)); + + if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE)) + sc->vtblk_sector_size = blkcfg->blk_size; + else + sc->vtblk_sector_size = DEV_BSIZE; + + info.d_media_blksize = sc->vtblk_sector_size; + info.d_media_blocks = blkcfg->capacity; + + info.d_ncylinders = blkcfg->geometry.cylinders; + info.d_nheads = blkcfg->geometry.heads; + info.d_secpertrack = blkcfg->geometry.sectors; + + info.d_secpercyl = info.d_secpertrack * info.d_nheads; + + devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev), + DEV_BSIZE, DEVSTAT_ALL_SUPPORTED, + DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER, + DEVSTAT_PRIORITY_DISK); + + /* attach a generic disk device to ourselves */ + sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk, + &vbd_disk_ops); + + sc->cdev->si_drv1 = sc; + disk_setdiskinfo(&sc->vtblk_disk, &info); +} + +static void +vtblk_startio(struct vtblk_softc *sc) +{ + struct virtqueue *vq; + struct vtblk_request *req; + int enq; + + vq = sc->vtblk_vq; + enq = 0; + + VTBLK_LOCK_ASSERT(sc); + + if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND) + return; + + while (!virtqueue_full(vq)) { + if ((req = vtblk_dequeue_ready(sc)) == NULL) + req = vtblk_bio_request(sc); + if (req == NULL) + break; + + if (vtblk_execute_request(sc, req) != 0) { + vtblk_enqueue_ready(sc, req); + break; + } + + enq++; + } + + if (enq > 0) + virtqueue_notify(vq, &sc->vtblk_mtx); +} + +static struct vtblk_request * +vtblk_bio_request(struct vtblk_softc *sc) +{ + struct bio_queue_head *bioq; + struct vtblk_request *req; + struct bio *bio; + struct buf *bp; + + bioq = &sc->vtblk_bioq; + + if (bioq_first(bioq) == NULL) + return (NULL); + + req = vtblk_dequeue_request(sc); + if (req == NULL) + return (NULL); + + bio = bioq_takefirst(bioq); + req->vbr_bp = bio; + req->vbr_ack = -1; + req->vbr_hdr.ioprio = 1; + bp = bio->bio_buf; + + switch (bp->b_cmd) { + case BUF_CMD_FLUSH: + req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; + break; + case BUF_CMD_READ: + req->vbr_hdr.type = VIRTIO_BLK_T_IN; + req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE; + break; + case BUF_CMD_WRITE: + req->vbr_hdr.type = VIRTIO_BLK_T_OUT; + req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE; + break; + default: + KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd)); + req->vbr_hdr.type = -1; + break; + } + + if (bp->b_flags & B_ORDERED) + req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER; + + return (req); +} + +static int +vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) +{ + struct sglist *sg; + struct bio *bio; + struct buf *bp; + int writable, error; + + sg = sc->vtblk_sglist; + bio = req->vbr_bp; + bp = bio->bio_buf; + writable = 0; + + /* + * sglist is live throughout this subroutine. + */ + sglist_reset(sg); + + error = sglist_append(sg, &req->vbr_hdr, + sizeof(struct virtio_blk_outhdr)); + KASSERT(error == 0, ("error adding header to sglist")); + KASSERT(sg->sg_nseg == 1, + ("header spanned multiple segments: %d", sg->sg_nseg)); + + if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) { + error = sglist_append(sg, bp->b_data, bp->b_bcount); + KASSERT(error == 0, ("error adding buffer to sglist")); + + /* BUF_CMD_READ means the host writes into our buffer. */ + if (bp->b_cmd == BUF_CMD_READ) + writable += sg->sg_nseg - 1; + } + + error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); + KASSERT(error == 0, ("error adding ack to sglist")); + writable++; + + KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS, + ("fewer than min segments: %d", sg->sg_nseg)); + + error = virtqueue_enqueue(sc->vtblk_vq, req, sg, + sg->sg_nseg - writable, writable); + + sglist_reset(sg); + + return (error); +} + +static int +vtblk_vq_intr(void *xsc) +{ + struct vtblk_softc *sc; + + sc = xsc; + + virtqueue_disable_intr(sc->vtblk_vq); + taskqueue_enqueue(taskqueue_thread[mycpuid], &sc->vtblk_intr_task); + + return (1); +} + +static void +vtblk_intr_task(void *arg, int pending) +{ + struct vtblk_softc *sc; + struct vtblk_request *req; + struct virtqueue *vq; + struct bio *bio; + struct buf *bp; + + sc = arg; + vq = sc->vtblk_vq; + +retry: + VTBLK_LOCK(sc); + if (sc->vtblk_flags & VTBLK_FLAG_DETACH) { + VTBLK_UNLOCK(sc); + return; + } + + while ((req = virtqueue_dequeue(vq, NULL)) != NULL) { + bio = req->vbr_bp; + bp = bio->bio_buf; + + if (req->vbr_ack == VIRTIO_BLK_S_OK) + bp->b_resid = 0; + else { + bp->b_flags |= B_ERROR; + if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) { + bp->b_error = ENOTSUP; + } else { + bp->b_error = EIO; + } + } + + devstat_end_transaction_buf(&sc->stats, bio->bio_buf); + + VTBLK_UNLOCK(sc); + /* + * Unlocking the controller around biodone() does not allow + * processing further device interrupts; when we queued + * vtblk_intr_task, we disabled interrupts. It will allow + * concurrent vtblk_strategy/_startio command dispatches. + */ + biodone(bio); + VTBLK_LOCK(sc); + + vtblk_enqueue_request(sc, req); + } + + vtblk_startio(sc); + + if (virtqueue_enable_intr(vq) != 0) { + /* + * If new virtqueue entries appeared immediately after + * enabling interrupts, process them now. Release and + * retake softcontroller lock to try to avoid blocking + * I/O dispatch for too long. + */ + virtqueue_disable_intr(vq); + VTBLK_UNLOCK(sc); + goto retry; + } + + VTBLK_UNLOCK(sc); +} + +static void +vtblk_stop(struct vtblk_softc *sc) +{ + virtqueue_disable_intr(sc->vtblk_vq); + virtio_stop(sc->vtblk_dev); +} + +static void +vtblk_prepare_dump(struct vtblk_softc *sc) +{ + device_t dev; + struct virtqueue *vq; + + dev = sc->vtblk_dev; + vq = sc->vtblk_vq; + + vtblk_stop(sc); + + /* + * Drain all requests caught in-flight in the virtqueue, + * skipping biodone(). When dumping, only one request is + * outstanding at a time, and we just poll the virtqueue + * for the response. + */ + vtblk_drain_vq(sc, 1); + + if (virtio_reinit(dev, sc->vtblk_features) != 0) + panic("cannot reinit VirtIO block device during dump"); + + virtqueue_disable_intr(vq); + virtio_reinit_complete(dev); +} + +static int +vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset, + size_t length) +{ + struct bio bio; + struct vtblk_request *req; + struct buf *bp; + + req = &sc->vtblk_dump_request; + req->vbr_ack = -1; + req->vbr_hdr.type = VIRTIO_BLK_T_OUT; + req->vbr_hdr.ioprio = 1; + req->vbr_hdr.sector = offset / DEV_BSIZE; + + req->vbr_bp = &bio; + bzero(&buf, sizeof(struct bio)); + bp = bio.bio_buf; + + bp->b_cmd = BUF_CMD_WRITE; + bp->b_data = virtual; + bp->b_bcount = length; + + return (vtblk_poll_request(sc, req)); +} + +static int +vtblk_flush_dump(struct vtblk_softc *sc) +{ + struct bio bio; + struct vtblk_request *req; + struct buf *bp; + + req = &sc->vtblk_dump_request; + req->vbr_ack = -1; + req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; + req->vbr_hdr.ioprio = 1; + req->vbr_hdr.sector = 0; + + req->vbr_bp = &bio; + bzero(&buf, sizeof(struct bio)); + bp = bio.bio_buf; + + bp->b_cmd = BUF_CMD_FLUSH; + + return (vtblk_poll_request(sc, req)); +} + +static int +vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) +{ + device_t dev; + struct virtqueue *vq; + struct vtblk_request *r; + int error; + + dev = sc->vtblk_dev; + vq = sc->vtblk_vq; + + if (!virtqueue_empty(vq)) + return (EBUSY); + + error = vtblk_execute_request(sc, req); + if (error) + return (error); + + virtqueue_notify(vq, &sc->vtblk_mtx); + + r = virtqueue_poll(vq, NULL); + KASSERT(r == req, ("unexpected request response")); + + if (req->vbr_ack != VIRTIO_BLK_S_OK) { + error = req->vbr_ack == VIRTIO_BLK_S_UNSUPP ? ENOTSUP : EIO; + if (bootverbose) + device_printf(dev, + "vtblk_poll_request: IO error: %d\n", error); + } + + return (error); +} + +static void +vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) +{ + struct virtqueue *vq; + struct vtblk_request *req; + int last; + + vq = sc->vtblk_vq; + last = 0; + + while ((req = virtqueue_drain(vq, &last)) != NULL) { + if (!skip_done) + vtblk_bio_error(req->vbr_bp, ENXIO); + + vtblk_enqueue_request(sc, req); + } + + KASSERT(virtqueue_empty(vq), ("virtqueue not empty")); +} + +static void +vtblk_drain(struct vtblk_softc *sc) +{ + struct bio_queue_head *bioq; + struct vtblk_request *req; + struct bio *bp; + + bioq = &sc->vtblk_bioq; + + if (sc->vtblk_vq != NULL) + vtblk_drain_vq(sc, 0); + + while ((req = vtblk_dequeue_ready(sc)) != NULL) { + vtblk_bio_error(req->vbr_bp, ENXIO); + vtblk_enqueue_request(sc, req); + } + + while (bioq_first(bioq) != NULL) { + bp = bioq_takefirst(bioq); + vtblk_bio_error(bp, ENXIO); + } + + vtblk_free_requests(sc); +} + +static int +vtblk_alloc_requests(struct vtblk_softc *sc) +{ + struct vtblk_request *req; + int i, nreqs; + + nreqs = virtqueue_size(sc->vtblk_vq); + + /* + * Preallocate sufficient requests to keep the virtqueue full. Each + * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce + * the number allocated when indirect descriptors are not available. + */ + nreqs /= VTBLK_MIN_SEGMENTS; + + for (i = 0; i < nreqs; i++) { + req = kmalloc(sizeof(struct vtblk_request), M_DEVBUF, M_WAITOK); + + sc->vtblk_request_count++; + vtblk_enqueue_request(sc, req); + } + + return (0); +} + +static void +vtblk_free_requests(struct vtblk_softc *sc) +{ + struct vtblk_request *req; + + while ((req = vtblk_dequeue_request(sc)) != NULL) { + sc->vtblk_request_count--; + kfree(req, M_DEVBUF); + } + + KASSERT(sc->vtblk_request_count == 0, ("leaked requests")); +} + +static struct vtblk_request * +vtblk_dequeue_request(struct vtblk_softc *sc) +{ + struct vtblk_request *req; + + req = TAILQ_FIRST(&sc->vtblk_req_free); + if (req != NULL) + TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link); + + return (req); +} + +static void +vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req) +{ + bzero(req, sizeof(struct vtblk_request)); + TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link); +} + +static struct vtblk_request * +vtblk_dequeue_ready(struct vtblk_softc *sc) +{ + struct vtblk_request *req; + + req = TAILQ_FIRST(&sc->vtblk_req_ready); + if (req != NULL) + TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link); + + return (req); +} + +static void +vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req) +{ + TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link); +} + +static void +vtblk_bio_error(struct bio *bp, int error) +{ + biodone(bp); +} diff --git a/sys/dev/virtual/virtio/block/virtio_blk.h b/sys/dev/virtual/virtio/block/virtio_blk.h new file mode 100644 index 0000000000..4e05e9e258 --- /dev/null +++ b/sys/dev/virtual/virtio/block/virtio_blk.h @@ -0,0 +1,119 @@ +/* + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/dev/virtio/block/virtio_blk.h,v 1.2 2011/12/06 06:28:32 grehan Exp $ + */ + +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include + +/* Feature bits */ +#define VIRTIO_BLK_F_BARRIER 0x0001 /* Does host support barriers? */ +#define VIRTIO_BLK_F_SIZE_MAX 0x0002 /* Indicates maximum segment size */ +#define VIRTIO_BLK_F_SEG_MAX 0x0004 /* Indicates maximum # of segments */ +#define VIRTIO_BLK_F_GEOMETRY 0x0010 /* Legacy geometry available */ +#define VIRTIO_BLK_F_RO 0x0020 /* Disk is read-only */ +#define VIRTIO_BLK_F_BLK_SIZE 0x0040 /* Block size of disk is available*/ +#define VIRTIO_BLK_F_SCSI 0x0080 /* Supports scsi command passthru */ +#define VIRTIO_BLK_F_FLUSH 0x0200 /* Cache flush command support */ +#define VIRTIO_BLK_F_TOPOLOGY 0x0400 /* Topology information is available */ + +#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ + +struct virtio_blk_config { + /* The capacity (in 512-byte sectors). */ + uint64_t capacity; + /* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */ + uint32_t size_max; + /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */ + uint32_t seg_max; + /* geometry the device (if VIRTIO_BLK_F_GEOMETRY) */ + struct virtio_blk_geometry { + uint16_t cylinders; + uint8_t heads; + uint8_t sectors; + } geometry; + + /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ + uint32_t blk_size; +} __packed; + +/* + * Command types + * + * Usage is a bit tricky as some bits are used as flags and some are not. + * + * Rules: + * VIRTIO_BLK_T_OUT may be combined with VIRTIO_BLK_T_SCSI_CMD or + * VIRTIO_BLK_T_BARRIER. VIRTIO_BLK_T_FLUSH is a command of its own + * and may not be combined with any of the other flags. + */ + +/* These two define direction. */ +#define VIRTIO_BLK_T_IN 0 +#define VIRTIO_BLK_T_OUT 1 + +/* This bit says it's a scsi command, not an actual read or write. */ +#define VIRTIO_BLK_T_SCSI_CMD 2 + +/* Cache flush command */ +#define VIRTIO_BLK_T_FLUSH 4 + +/* Get device ID command */ +#define VIRTIO_BLK_T_GET_ID 8 + +/* Barrier before this op. */ +#define VIRTIO_BLK_T_BARRIER 0x80000000 + +/* ID string length */ +#define VIRTIO_BLK_ID_BYTES 20 + +/* This is the first element of the read scatter-gather list. */ +struct virtio_blk_outhdr { + /* VIRTIO_BLK_T* */ + uint32_t type; + /* io priority. */ + uint32_t ioprio; + /* Sector (ie. 512 byte offset) */ + uint64_t sector; +}; + +struct virtio_scsi_inhdr { + uint32_t errors; + uint32_t data_len; + uint32_t sense_len; + uint32_t residual; +}; + +/* And this is the final byte of the write scatter-gather list. */ +#define VIRTIO_BLK_S_OK 0 +#define VIRTIO_BLK_S_IOERR 1 +#define VIRTIO_BLK_S_UNSUPP 2 + +#endif /* _VIRTIO_BLK_H */ diff --git a/sys/dev/virtual/virtio/pci/Makefile b/sys/dev/virtual/virtio/pci/Makefile new file mode 100644 index 0000000000..5e7f21aa09 --- /dev/null +++ b/sys/dev/virtual/virtio/pci/Makefile @@ -0,0 +1,40 @@ +# +# $FreeBSD$ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# + +.PATH: ${.CURDIR} + + +CFLAGS+= -I${.CURDIR}/.. + +KMOD= virtio_pci +SRCS= virtio_pci.c +SRCS+= virtio_bus_if.h virtio_if.h +SRCS+= bus_if.h device_if.h pci_if.h + +MFILES= kern/bus_if.m kern/device_if.m bus/pci/pci_if.m +MFILES+=dev/virtual/virtio/virtio/virtio_bus_if.m +MFILES+=dev/virtual/virtio/virtio/virtio_if.m + +.include diff --git a/sys/dev/virtual/virtio/pci/virtio_bus_if.h b/sys/dev/virtual/virtio/pci/virtio_bus_if.h new file mode 100644 index 0000000000..987e77ee8b --- /dev/null +++ b/sys/dev/virtual/virtio/pci/virtio_bus_if.h @@ -0,0 +1,123 @@ +/* + * This file is produced automatically. + * Do not modify anything in here by hand. + * + * Created from source file + * @/dev/virtio/virtio_bus_if.m + * with + * makeobjops.awk + * + * See the source file for legal information + */ + +#ifndef _virtio_bus_if_h_ +#define _virtio_bus_if_h_ + + +struct vq_alloc_info; + +extern struct kobjop_desc virtio_bus_negotiate_features_desc; +typedef uint64_t virtio_bus_negotiate_features_t(device_t dev, + uint64_t child_features); +static __inline uint64_t VIRTIO_BUS_NEGOTIATE_FEATURES(device_t dev, + uint64_t child_features) +{ + kobjop_t _m; + KOBJOPLOOKUP(((kobj_t)dev)->ops, virtio_bus_negotiate_features); + return ((virtio_bus_negotiate_features_t *) _m)(dev, child_features); +} + +extern struct kobjop_desc virtio_bus_with_feature_desc; +typedef int virtio_bus_with_feature_t(device_t dev, uint64_t feature); +static __inline int VIRTIO_BUS_WITH_FEATURE(device_t dev, uint64_t feature) +{ + kobjop_t _m; + KOBJOPLOOKUP(((kobj_t)dev)->ops, virtio_bus_with_feature); + return ((virtio_bus_with_feature_t *) _m)(dev, feature); +} + +extern struct kobjop_desc virtio_bus_alloc_virtqueues_desc; +typedef int virtio_bus_alloc_virtqueues_t(device_t dev, int flags, int nvqs, + struct vq_alloc_info *info); +static __inline int VIRTIO_BUS_ALLOC_VIRTQUEUES(device_t dev, int flags, + int nvqs, + struct vq_alloc_info *info) +{ + kobjop_t _m; + KOBJOPLOOKUP(((kobj_t)dev)->ops, virtio_bus_alloc_virtqueues); + return ((virtio_bus_alloc_virtqueues_t *) _m)(dev, flags, nvqs, info); +} + + +#define VIRTIO_ALLOC_VQS_DISABLE_MSIX 0x1 + +extern struct kobjop_desc virtio_bus_setup_intr_desc; +typedef int virtio_bus_setup_intr_t(device_t dev); +static __inline int VIRTIO_BUS_SETUP_INTR(device_t dev) +{ + kobjop_t _m; + KOBJOPLOOKUP(((kobj_t)dev)->ops, virtio_bus_setup_intr); + return ((virtio_bus_setup_intr_t *) _m)(dev); +} + +extern struct kobjop_desc virtio_bus_stop_desc; +typedef void virtio_bus_stop_t(device_t dev); +static __inline void VIRTIO_BUS_STOP(device_t dev) +{ + kobjop_t _m; + KOBJOPLOOKUP(((kobj_t)dev)->ops, virtio_bus_stop); + ((virtio_bus_stop_t *) _m)(dev); +} + +extern struct kobjop_desc virtio_bus_reinit_desc; +typedef int virtio_bus_reinit_t(device_t dev, uint64_t features); +static __inline int VIRTIO_BUS_REINIT(device_t dev, uint64_t features) +{ + kobjop_t _m; + KOBJOPLOOKUP(((kobj_t)dev)->ops, virtio_bus_reinit); + return ((virtio_bus_reinit_t *) _m)(dev, features); +} + +extern struct kobjop_desc virtio_bus_reinit_complete_desc; +typedef void virtio_bus_reinit_complete_t(device_t dev); +static __inline void VIRTIO_BUS_REINIT_COMPLETE(device_t dev) +{ + kobjop_t _m; + KOBJOPLOOKUP(((kobj_t)dev)->ops, virtio_bus_reinit_complete); + ((virtio_bus_reinit_complete_t *) _m)(dev); +} + +extern struct kobjop_desc virtio_bus_notify_vq_desc; +typedef void virtio_bus_notify_vq_t(device_t dev, uint16_t queue); +static __inline void VIRTIO_BUS_NOTIFY_VQ(device_t dev, uint16_t queue) +{ + kobjop_t _m; + KOBJOPLOOKUP(((kobj_t)dev)->ops, virtio_bus_notify_vq); + ((virtio_bus_notify_vq_t *) _m)(dev, queue); +} + +extern struct kobjop_desc virtio_bus_read_device_config_desc; +typedef void virtio_bus_read_device_config_t(device_t dev, bus_size_t offset, + void *dst, int len); +static __inline void VIRTIO_BUS_READ_DEVICE_CONFIG(device_t dev, + bus_size_t offset, void *dst, + int len) +{ + kobjop_t _m; + KOBJOPLOOKUP(((kobj_t)dev)->ops, virtio_bus_read_device_config); + ((virtio_bus_read_device_config_t *) _m)(dev, offset, dst, len); +} + +extern struct kobjop_desc virtio_bus_write_device_config_desc; +typedef void virtio_bus_write_device_config_t(device_t dev, bus_size_t offset, + void *src, int len); +static __inline void VIRTIO_BUS_WRITE_DEVICE_CONFIG(device_t dev, + bus_size_t offset, + void *src, int len) +{ + kobjop_t _m; + KOBJOPLOOKUP(((kobj_t)dev)->ops, virtio_bus_write_device_config); + ((virtio_bus_write_device_config_t *) _m)(dev, offset, src, len); +} + +#endif /* _virtio_bus_if_h_ */ diff --git a/sys/dev/virtual/virtio/pci/virtio_if.h b/sys/dev/virtual/virtio/pci/virtio_if.h new file mode 100644 index 0000000000..eb343a95dc --- /dev/null +++ b/sys/dev/virtual/virtio/pci/virtio_if.h @@ -0,0 +1,25 @@ +/* + * This file is produced automatically. + * Do not modify anything in here by hand. + * + * Created from source file + * @/dev/virtio/virtio_if.m + * with + * makeobjops.awk + * + * See the source file for legal information + */ + +#ifndef _virtio_if_h_ +#define _virtio_if_h_ + +extern struct kobjop_desc virtio_config_change_desc; +typedef int virtio_config_change_t(device_t dev); +static __inline int VIRTIO_CONFIG_CHANGE(device_t dev) +{ + kobjop_t _m; + KOBJOPLOOKUP(((kobj_t)dev)->ops, virtio_config_change); + return ((virtio_config_change_t *) _m)(dev); +} + +#endif /* _virtio_if_h_ */ diff --git a/sys/dev/virtual/virtio/pci/virtio_pci.c b/sys/dev/virtual/virtio/pci/virtio_pci.c new file mode 100644 index 0000000000..59baca0181 --- /dev/null +++ b/sys/dev/virtual/virtio/pci/virtio_pci.c @@ -0,0 +1,1092 @@ +/*- + * Copyright (c) 2011, Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: src/sys/dev/virtio/pci/virtio_pci.c,v 1.3 2012/04/14 05:48:04 grehan Exp $ + */ + +/* Driver for the VirtIO PCI interface. */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + + +#include +#include +#include "virtio_pci.h" + +#include "virtio_bus_if.h" +#include "virtio_if.h" + +struct vtpci_softc { + device_t vtpci_dev; + struct resource *vtpci_res; + struct resource *vtpci_msix_res; + uint64_t vtpci_features; + uint32_t vtpci_flags; + int vtpci_irq_type; + int vtpci_irq_rid; +#define VIRTIO_PCI_FLAG_NO_MSI 0x0001 +#define VIRTIO_PCI_FLAG_MSI 0x0002 +#define VIRTIO_PCI_FLAG_NO_MSIX 0x0010 +#define VIRTIO_PCI_FLAG_MSIX 0x0020 +#define VIRTIO_PCI_FLAG_SHARED_MSIX 0x0040 + + device_t vtpci_child_dev; + struct virtio_feature_desc *vtpci_child_feat_desc; + + /* + * Ideally, each virtqueue that the driver provides a callback for + * will receive its own MSIX vector. If there are not sufficient + * vectors available, we will then attempt to have all the VQs + * share one vector. Note that when using MSIX, the configuration + * changed notifications must be on their own vector. + * + * If MSIX is not available, we will attempt to have the whole + * device share one MSI vector, and then, finally, one legacy + * interrupt. + */ + int vtpci_nvqs; + struct vtpci_virtqueue { + struct virtqueue *vq; + + /* Index into vtpci_intr_res[] below. Unused, then -1. */ + int ires_idx; + } vtpci_vqx[VIRTIO_MAX_VIRTQUEUES]; + + /* + * When using MSIX interrupts, the first element of vtpci_intr_res[] + * is always the configuration changed notifications. The remaining + * element(s) are used for the virtqueues. + * + * With MSI and legacy interrupts, only the first element of + * vtpci_intr_res[] is used. + */ + int vtpci_nintr_res; + struct vtpci_intr_resource { + struct resource *irq; + int rid; + void *intrhand; + } vtpci_intr_res[1 + VIRTIO_MAX_VIRTQUEUES]; +}; + +static int vtpci_probe(device_t); +static int vtpci_attach(device_t); +static int vtpci_detach(device_t); +static int vtpci_suspend(device_t); +static int vtpci_resume(device_t); +static int vtpci_shutdown(device_t); +static void vtpci_driver_added(device_t, driver_t *); +static void vtpci_child_detached(device_t, device_t); +static int vtpci_read_ivar(device_t, device_t, int, uintptr_t *); +static int vtpci_write_ivar(device_t, device_t, int, uintptr_t); + +static uint64_t vtpci_negotiate_features(device_t, uint64_t); +static int vtpci_with_feature(device_t, uint64_t); +static int vtpci_alloc_virtqueues(device_t, int, int, + struct vq_alloc_info *); +static int vtpci_setup_intr(device_t); +static void vtpci_stop(device_t); +static int vtpci_reinit(device_t, uint64_t); +static void vtpci_reinit_complete(device_t); +static void vtpci_notify_virtqueue(device_t, uint16_t); +static uint8_t vtpci_get_status(device_t); +static void vtpci_set_status(device_t, uint8_t); +static void vtpci_read_dev_config(device_t, bus_size_t, void *, int); +static void vtpci_write_dev_config(device_t, bus_size_t, void *, int); + +static void vtpci_describe_features(struct vtpci_softc *, const char *, + uint64_t); +static void vtpci_probe_and_attach_child(struct vtpci_softc *); + +static int vtpci_alloc_interrupts(struct vtpci_softc *, int, int, + struct vq_alloc_info *); +static int vtpci_alloc_intr_resources(struct vtpci_softc *, int, + struct vq_alloc_info *); +static int vtpci_alloc_msi(struct vtpci_softc *); +static int vtpci_alloc_msix(struct vtpci_softc *, int); +static int vtpci_register_msix_vector(struct vtpci_softc *, int, int); + +static void vtpci_free_interrupts(struct vtpci_softc *); +static void vtpci_free_virtqueues(struct vtpci_softc *); +static void vtpci_release_child_resources(struct vtpci_softc *); +static void vtpci_reset(struct vtpci_softc *); + +static int vtpci_legacy_intr(void *); +static int vtpci_vq_shared_intr(void *); +static int vtpci_vq_intr(void *); +static int vtpci_config_intr(void *); + +/* + * I/O port read/write wrappers. + */ +#define vtpci_read_config_1(sc, o) bus_read_1((sc)->vtpci_res, (o)) +#define vtpci_read_config_2(sc, o) bus_read_2((sc)->vtpci_res, (o)) +#define vtpci_read_config_4(sc, o) bus_read_4((sc)->vtpci_res, (o)) +#define vtpci_write_config_1(sc, o, v) bus_write_1((sc)->vtpci_res, (o), (v)) +#define vtpci_write_config_2(sc, o, v) bus_write_2((sc)->vtpci_res, (o), (v)) +#define vtpci_write_config_4(sc, o, v) bus_write_4((sc)->vtpci_res, (o), (v)) + +/* Tunables. */ +static int vtpci_disable_msix = 0; +TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix); + +static device_method_t vtpci_methods[] = { + /* Device interface. */ + DEVMETHOD(device_probe, vtpci_probe), + DEVMETHOD(device_attach, vtpci_attach), + DEVMETHOD(device_detach, vtpci_detach), + DEVMETHOD(device_suspend, vtpci_suspend), + DEVMETHOD(device_resume, vtpci_resume), + DEVMETHOD(device_shutdown, vtpci_shutdown), + + /* Bus interface. */ + DEVMETHOD(bus_driver_added, vtpci_driver_added), + DEVMETHOD(bus_child_detached, vtpci_child_detached), + DEVMETHOD(bus_read_ivar, vtpci_read_ivar), + DEVMETHOD(bus_write_ivar, vtpci_write_ivar), + + /* VirtIO bus interface. */ + DEVMETHOD(virtio_bus_negotiate_features, vtpci_negotiate_features), + DEVMETHOD(virtio_bus_with_feature, vtpci_with_feature), + DEVMETHOD(virtio_bus_alloc_virtqueues, vtpci_alloc_virtqueues), + DEVMETHOD(virtio_bus_setup_intr, vtpci_setup_intr), + DEVMETHOD(virtio_bus_stop, vtpci_stop), + DEVMETHOD(virtio_bus_reinit, vtpci_reinit), + DEVMETHOD(virtio_bus_reinit_complete, vtpci_reinit_complete), + DEVMETHOD(virtio_bus_notify_vq, vtpci_notify_virtqueue), + DEVMETHOD(virtio_bus_read_device_config, vtpci_read_dev_config), + DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config), + + { 0, 0 } +}; + +static driver_t vtpci_driver = { + "virtio_pci", + vtpci_methods, + sizeof(struct vtpci_softc) +}; + +devclass_t vtpci_devclass; + +DRIVER_MODULE(virtio_pci, pci, vtpci_driver, vtpci_devclass, 0, 0); +MODULE_VERSION(virtio_pci, 1); +MODULE_DEPEND(virtio_pci, pci, 1, 1, 1); +MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1); + +static int +vtpci_probe(device_t dev) +{ + char desc[36]; + const char *name; + + if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID) + return (ENXIO); + + if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN || + pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MAX) + return (ENXIO); + + if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION) + return (ENXIO); + + name = virtio_device_name(pci_get_subdevice(dev)); + if (name == NULL) + name = "Unknown"; + + ksnprintf(desc, sizeof(desc), "VirtIO PCI %s adapter", name); + device_set_desc_copy(dev, desc); + + return (BUS_PROBE_DEFAULT); +} + +static int +vtpci_attach(device_t dev) +{ + struct vtpci_softc *sc; + device_t child; + int rid; + + sc = device_get_softc(dev); + sc->vtpci_dev = dev; + + pci_enable_busmaster(dev); + + rid = PCIR_BAR(0); + sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, + RF_ACTIVE); + if (sc->vtpci_res == NULL) { + device_printf(dev, "cannot map I/O space\n"); + return (ENXIO); + } + + if (pci_find_extcap(dev, PCIY_MSI, NULL) != 0) + sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSI; + /* XXX(vsrinivas): Check out how to get MSI-X */ +#if OLD_MSI + if (pci_find_extcap(dev, PCIY_MSIX, NULL) == 0) { + rid = PCIR_BAR(1); + sc->vtpci_msix_res = bus_alloc_resource_any(dev, + SYS_RES_MEMORY, &rid, RF_ACTIVE); + } +#endif + if (sc->vtpci_msix_res == NULL) + sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSIX; + + vtpci_reset(sc); + + /* Tell the host we've noticed this device. */ + vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK); + + if ((child = device_add_child(dev, NULL, -1)) == NULL) { + device_printf(dev, "cannot create child device\n"); + vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED); + vtpci_detach(dev); + return (ENOMEM); + } + + sc->vtpci_child_dev = child; + vtpci_probe_and_attach_child(sc); + + return (0); +} + +static int +vtpci_detach(device_t dev) +{ + struct vtpci_softc *sc; + device_t child; + int error; + + sc = device_get_softc(dev); + + if ((child = sc->vtpci_child_dev) != NULL) { + error = device_delete_child(dev, child); + if (error) + return (error); + sc->vtpci_child_dev = NULL; + } + + vtpci_reset(sc); + + if (sc->vtpci_msix_res != NULL) { + bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(1), + sc->vtpci_msix_res); + sc->vtpci_msix_res = NULL; + } + + if (sc->vtpci_res != NULL) { + bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0), + sc->vtpci_res); + sc->vtpci_res = NULL; + } + + return (0); +} + +static int +vtpci_suspend(device_t dev) +{ + + return (bus_generic_suspend(dev)); +} + +static int +vtpci_resume(device_t dev) +{ + + return (bus_generic_resume(dev)); +} + +static int +vtpci_shutdown(device_t dev) +{ + + (void) bus_generic_shutdown(dev); + /* Forcibly stop the host device. */ + vtpci_stop(dev); + + return (0); +} + +static void +vtpci_driver_added(device_t dev, driver_t *driver) +{ + struct vtpci_softc *sc; + + sc = device_get_softc(dev); + + vtpci_probe_and_attach_child(sc); +} + +static void +vtpci_child_detached(device_t dev, device_t child) +{ + struct vtpci_softc *sc; + + sc = device_get_softc(dev); + + vtpci_reset(sc); + vtpci_release_child_resources(sc); +} + +static int +vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) +{ + struct vtpci_softc *sc; + + sc = device_get_softc(dev); + + if (sc->vtpci_child_dev != child) + return (ENOENT); + + switch (index) { + case VIRTIO_IVAR_DEVTYPE: + *result = pci_get_subdevice(dev); + break; + default: + return (ENOENT); + } + + return (0); +} + +static int +vtpci_write_ivar(device_t dev, device_t child, int index, uintptr_t value) +{ + struct vtpci_softc *sc; + + sc = device_get_softc(dev); + + if (sc->vtpci_child_dev != child) + return (ENOENT); + + switch (index) { + case VIRTIO_IVAR_FEATURE_DESC: + sc->vtpci_child_feat_desc = (void *) value; + break; + default: + return (ENOENT); + } + + return (0); +} + +static uint64_t +vtpci_negotiate_features(device_t dev, uint64_t child_features) +{ + struct vtpci_softc *sc; + uint64_t host_features, features; + + sc = device_get_softc(dev); + + host_features = vtpci_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES); + vtpci_describe_features(sc, "host", host_features); + + /* + * Limit negotiated features to what the driver, virtqueue, and + * host all support. + */ + features = host_features & child_features; + features = virtqueue_filter_features(features); + sc->vtpci_features = features; + + vtpci_describe_features(sc, "negotiated", features); + vtpci_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features); + + return (features); +} + +static int +vtpci_with_feature(device_t dev, uint64_t feature) +{ + struct vtpci_softc *sc; + + sc = device_get_softc(dev); + + return ((sc->vtpci_features & feature) != 0); +} + +static int +vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs, + struct vq_alloc_info *vq_info) +{ + struct vtpci_softc *sc; + struct vtpci_virtqueue *vqx; + struct vq_alloc_info *info; + int queue, error; + uint16_t vq_size; + + sc = device_get_softc(dev); + + if (sc->vtpci_nvqs != 0 || nvqs <= 0 || + nvqs > VIRTIO_MAX_VIRTQUEUES) + return (EINVAL); + + error = vtpci_alloc_interrupts(sc, flags, nvqs, vq_info); + if (error) { + device_printf(dev, "cannot allocate interrupts\n"); + return (error); + } + + if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { + error = vtpci_register_msix_vector(sc, + VIRTIO_MSI_CONFIG_VECTOR, 0); + if (error) + return (error); + } + + for (queue = 0; queue < nvqs; queue++) { + vqx = &sc->vtpci_vqx[queue]; + info = &vq_info[queue]; + + vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue); + + vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); + error = virtqueue_alloc(dev, queue, vq_size, + VIRTIO_PCI_VRING_ALIGN, 0xFFFFFFFFUL, info, &vqx->vq); + if (error) + return (error); + + if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { + error = vtpci_register_msix_vector(sc, + VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx); + if (error) + return (error); + } + + vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, + virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + + *info->vqai_vq = vqx->vq; + sc->vtpci_nvqs++; + } + + return (0); +} + +static int +vtpci_setup_intr(device_t dev) +{ + struct vtpci_softc *sc; + struct vtpci_intr_resource *ires; + struct vtpci_virtqueue *vqx; + int i, flags, error; + + sc = device_get_softc(dev); + flags = INTR_MPSAFE; + ires = &sc->vtpci_intr_res[0]; + + if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) == 0) { + error = bus_setup_intr(dev, ires->irq, flags, + (driver_intr_t *) vtpci_legacy_intr, sc, &ires->intrhand, NULL); + + return (error); + } + + error = bus_setup_intr(dev, ires->irq, flags,(driver_intr_t *) vtpci_config_intr, + sc, &ires->intrhand, NULL); + if (error) + return (error); + + if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) { + ires = &sc->vtpci_intr_res[1]; + error = bus_setup_intr(dev, ires->irq, flags, + (driver_intr_t *) vtpci_vq_shared_intr, sc, &ires->intrhand, NULL); + + return (error); + } + + /* Setup an interrupt handler for each virtqueue. */ + for (i = 0; i < sc->vtpci_nvqs; i++) { + vqx = &sc->vtpci_vqx[i]; + if (vqx->ires_idx < 1) + continue; + + ires = &sc->vtpci_intr_res[vqx->ires_idx]; + error = bus_setup_intr(dev, ires->irq, flags, + (driver_intr_t *) vtpci_vq_intr, vqx->vq, &ires->intrhand, NULL); + if (error) + return (error); + } + + return (0); +} + +static void +vtpci_stop(device_t dev) +{ + + vtpci_reset(device_get_softc(dev)); +} + +static int +vtpci_reinit(device_t dev, uint64_t features) +{ + struct vtpci_softc *sc; + struct vtpci_virtqueue *vqx; + struct virtqueue *vq; + int queue, error; + uint16_t vq_size; + + sc = device_get_softc(dev); + + /* + * Redrive the device initialization. This is a bit of an abuse + * of the specification, but both VirtualBox and QEMU/KVM seem + * to play nice. We do not allow the host device to change from + * what was originally negotiated beyond what the guest driver + * changed (MSIX state should not change, number of virtqueues + * and their size remain the same, etc). + */ + + if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET) + vtpci_stop(dev); + + /* + * Quickly drive the status through ACK and DRIVER. The device + * does not become usable again until vtpci_reinit_complete(). + */ + vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK); + vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER); + + vtpci_negotiate_features(dev, features); + + if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { + error = vtpci_register_msix_vector(sc, + VIRTIO_MSI_CONFIG_VECTOR, 0); + if (error) + return (error); + } + + for (queue = 0; queue < sc->vtpci_nvqs; queue++) { + vqx = &sc->vtpci_vqx[queue]; + vq = vqx->vq; + + KASSERT(vq != NULL, ("vq %d not allocated", queue)); + vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue); + + vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); + error = virtqueue_reinit(vq, vq_size); + if (error) + return (error); + + if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { + error = vtpci_register_msix_vector(sc, + VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx); + if (error) + return (error); + } + + vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, + virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + } + + return (0); +} + +static void +vtpci_reinit_complete(device_t dev) +{ + + vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK); +} + +static void +vtpci_notify_virtqueue(device_t dev, uint16_t queue) +{ + struct vtpci_softc *sc; + + sc = device_get_softc(dev); + + vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_NOTIFY, queue); +} + +static uint8_t +vtpci_get_status(device_t dev) +{ + struct vtpci_softc *sc; + + sc = device_get_softc(dev); + + return (vtpci_read_config_1(sc, VIRTIO_PCI_STATUS)); +} + +static void +vtpci_set_status(device_t dev, uint8_t status) +{ + struct vtpci_softc *sc; + + sc = device_get_softc(dev); + + if (status != VIRTIO_CONFIG_STATUS_RESET) + status |= vtpci_get_status(dev); + + vtpci_write_config_1(sc, VIRTIO_PCI_STATUS, status); +} + +static void +vtpci_read_dev_config(device_t dev, bus_size_t offset, + void *dst, int length) +{ + struct vtpci_softc *sc; + bus_size_t off; + uint8_t *d; + int size; + + sc = device_get_softc(dev); + off = VIRTIO_PCI_CONFIG(sc) + offset; + + for (d = dst; length > 0; d += size, off += size, length -= size) { + if (length >= 4) { + size = 4; + *(uint32_t *)d = vtpci_read_config_4(sc, off); + } else if (length >= 2) { + size = 2; + *(uint16_t *)d = vtpci_read_config_2(sc, off); + } else { + size = 1; + *d = vtpci_read_config_1(sc, off); + } + } +} + +static void +vtpci_write_dev_config(device_t dev, bus_size_t offset, + void *src, int length) +{ + struct vtpci_softc *sc; + bus_size_t off; + uint8_t *s; + int size; + + sc = device_get_softc(dev); + off = VIRTIO_PCI_CONFIG(sc) + offset; + + for (s = src; length > 0; s += size, off += size, length -= size) { + if (length >= 4) { + size = 4; + vtpci_write_config_4(sc, off, *(uint32_t *)s); + } else if (length >= 2) { + size = 2; + vtpci_write_config_2(sc, off, *(uint16_t *)s); + } else { + size = 1; + vtpci_write_config_1(sc, off, *s); + } + } +} + +static void +vtpci_describe_features(struct vtpci_softc *sc, const char *msg, + uint64_t features) +{ + device_t dev, child; + + dev = sc->vtpci_dev; + child = sc->vtpci_child_dev; + + if (device_is_attached(child) && bootverbose == 0) + return; + + virtio_describe(dev, msg, features, sc->vtpci_child_feat_desc); +} + +static void +vtpci_probe_and_attach_child(struct vtpci_softc *sc) +{ + device_t dev, child; + + dev = sc->vtpci_dev; + child = sc->vtpci_child_dev; + + if (child == NULL) + return; + + if (device_get_state(child) != DS_NOTPRESENT) + return; + + if (device_probe_child(dev, child) != 0) + return; + + vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER); + if (DEVICE_ATTACH(child) != 0) { + vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED); + vtpci_reset(sc); + vtpci_release_child_resources(sc); + + /* Reset status for future attempt. */ + vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK); + } else + vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK); +} + +static int +vtpci_alloc_interrupts(struct vtpci_softc *sc, int flags, int nvqs, + struct vq_alloc_info *vq_info) +{ + int i, nvectors, error; + + /* + * Only allocate a vector for virtqueues that are actually + * expecting an interrupt. + */ + for (nvectors = 0, i = 0; i < nvqs; i++) + if (vq_info[i].vqai_intr != NULL) + nvectors++; + + if (vtpci_disable_msix != 0 || + sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSIX || + flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX || + vtpci_alloc_msix(sc, nvectors) != 0) { + /* + * Use MSI interrupts if available. Otherwise, we fallback + * to legacy interrupts. + */ + if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSI) == 0 && + vtpci_alloc_msi(sc) == 0) + sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSI; + + sc->vtpci_nintr_res = 1; + } + + error = vtpci_alloc_intr_resources(sc, nvqs, vq_info); + + return (error); +} + +static int +vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs, + struct vq_alloc_info *vq_info) +{ + device_t dev; + struct resource *irq; + struct vtpci_virtqueue *vqx; + int i, rid, flags, res_idx; + + dev = sc->vtpci_dev; + flags = RF_ACTIVE; + + if ((sc->vtpci_flags & + (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) == 0) { + rid = 0; + flags |= RF_SHAREABLE; + } else + rid = 1; + + for (i = 0; i < sc->vtpci_nintr_res; i++) { + irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, flags); + if (irq == NULL) + return (ENXIO); + + sc->vtpci_intr_res[i].irq = irq; + sc->vtpci_intr_res[i].rid = rid++; + } + + /* + * Map the virtqueue into the correct index in vq_intr_res[]. Note the + * first index is reserved for configuration changes notifications. + */ + for (i = 0, res_idx = 1; i < nvqs; i++) { + vqx = &sc->vtpci_vqx[i]; + + if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { + if (vq_info[i].vqai_intr == NULL) + vqx->ires_idx = -1; + else if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) + vqx->ires_idx = res_idx; + else + vqx->ires_idx = res_idx++; + } else + vqx->ires_idx = -1; + } + + return (0); +} + +static int +vtpci_alloc_msi(struct vtpci_softc *sc) +{ + device_t dev; + int nmsi, cnt; + u_int irq_flags; + + dev = sc->vtpci_dev; + nmsi = pci_msi_count(dev); + + if (nmsi < 1) + return (1); + + cnt = 1; + + sc->vtpci_irq_rid = 0; + sc->vtpci_irq_type = pci_alloc_1intr(dev, 1, + &sc->vtpci_irq_rid, &irq_flags); + + + return (1); +} + +static int +vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors) +{ + /* XXX(vsrinivas): Huh? Is this how MSI-X works?*/ + /* XXX(vsrinivas): All of this was disabled... */ +#ifdef OLD_MSI + device_t dev; + int nmsix, cnt, required; + + dev = sc->vtpci_dev; + + nmsix = pci_msix_count(dev); + if (nmsix < 1) + return (1); + + /* An additional vector is needed for the config changes. */ + required = nvectors + 1; + if (nmsix >= required) { + cnt = required; + if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) + goto out; + + pci_release_msi(dev); + } + + /* Attempt shared MSIX configuration. */ + required = 2; + if (nmsix >= required) { + cnt = required; + if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) { + sc->vtpci_flags |= VIRTIO_PCI_FLAG_SHARED_MSIX; + goto out; + } + + pci_release_msi(dev); + } + + return (1); + +out: + sc->vtpci_nintr_res = required; + sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSIX; + + if (bootverbose) { + if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) + device_printf(dev, "using shared virtqueue MSIX\n"); + else + device_printf(dev, "using per virtqueue MSIX\n"); + } +#endif + return (0); +} + +static int +vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, int res_idx) +{ + device_t dev; + uint16_t vector; + + dev = sc->vtpci_dev; + + if (offset != VIRTIO_MSI_CONFIG_VECTOR && + offset != VIRTIO_MSI_QUEUE_VECTOR) + return (EINVAL); + + if (res_idx != -1) { + /* Map from rid to host vector. */ + vector = sc->vtpci_intr_res[res_idx].rid - 1; + } else + vector = VIRTIO_MSI_NO_VECTOR; + + /* The first resource is special; make sure it is used correctly. */ + if (res_idx == 0) { + KASSERT(vector == 0, ("unexpected config vector")); + KASSERT(offset == VIRTIO_MSI_CONFIG_VECTOR, + ("unexpected config offset")); + } + + vtpci_write_config_2(sc, offset, vector); + + if (vtpci_read_config_2(sc, offset) != vector) { + device_printf(dev, "insufficient host resources for " + "MSIX interrupts\n"); + return (ENODEV); + } + + return (0); +} + +static void +vtpci_free_interrupts(struct vtpci_softc *sc) +{ + device_t dev; + struct vtpci_intr_resource *ires; + int i; + + dev = sc->vtpci_dev; + sc->vtpci_nintr_res = 0; + + if (sc->vtpci_flags & (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) { + pci_release_msi(dev); + sc->vtpci_flags &= ~(VIRTIO_PCI_FLAG_MSI | + VIRTIO_PCI_FLAG_MSIX | VIRTIO_PCI_FLAG_SHARED_MSIX); + } + + for (i = 0; i < 1 + VIRTIO_MAX_VIRTQUEUES; i++) { + ires = &sc->vtpci_intr_res[i]; + + if (ires->intrhand != NULL) { + bus_teardown_intr(dev, ires->irq, ires->intrhand); + ires->intrhand = NULL; + } + + if (ires->irq != NULL) { + bus_release_resource(dev, SYS_RES_IRQ, ires->rid, + ires->irq); + ires->irq = NULL; + } + + ires->rid = -1; + } +} + +static void +vtpci_free_virtqueues(struct vtpci_softc *sc) +{ + struct vtpci_virtqueue *vqx; + int i; + + sc->vtpci_nvqs = 0; + + for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; i++) { + vqx = &sc->vtpci_vqx[i]; + + if (vqx->vq != NULL) { + virtqueue_free(vqx->vq); + vqx->vq = NULL; + } + } +} + +static void +vtpci_release_child_resources(struct vtpci_softc *sc) +{ + + vtpci_free_interrupts(sc); + vtpci_free_virtqueues(sc); +} + +static void +vtpci_reset(struct vtpci_softc *sc) +{ + + /* + * Setting the status to RESET sets the host device to + * the original, uninitialized state. + */ + vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET); +} + +static int +vtpci_legacy_intr(void *xsc) +{ + struct vtpci_softc *sc; + struct vtpci_virtqueue *vqx; + int i; + uint8_t isr; + + sc = xsc; + vqx = &sc->vtpci_vqx[0]; + + /* Reading the ISR also clears it. */ + isr = vtpci_read_config_1(sc, VIRTIO_PCI_ISR); + + if (isr & VIRTIO_PCI_ISR_CONFIG) + vtpci_config_intr(sc); + + if (isr & VIRTIO_PCI_ISR_INTR) + for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) + virtqueue_intr(vqx->vq); + + return isr; +} + +static int +vtpci_vq_shared_intr(void *xsc) +{ + struct vtpci_softc *sc; + struct vtpci_virtqueue *vqx; + int i, rc; + + rc = 0; + sc = xsc; + vqx = &sc->vtpci_vqx[0]; + + for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) + rc |= virtqueue_intr(vqx->vq); + + return rc; +} + +static int +vtpci_vq_intr(void *xvq) +{ + struct virtqueue *vq; + int rc; + + vq = xvq; + rc = virtqueue_intr(vq); + + return rc; +} + +static int +vtpci_config_intr(void *xsc) +{ + struct vtpci_softc *sc; + device_t child; + int rc; + + rc = 0; + sc = xsc; + child = sc->vtpci_child_dev; + + if (child != NULL) + rc = VIRTIO_CONFIG_CHANGE(child); + + return rc; +} diff --git a/sys/dev/virtual/virtio/pci/virtio_pci.h b/sys/dev/virtual/virtio/pci/virtio_pci.h new file mode 100644 index 0000000000..4773146ce1 --- /dev/null +++ b/sys/dev/virtual/virtio/pci/virtio_pci.h @@ -0,0 +1,87 @@ +/*- + * Copyright IBM Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/dev/virtio/pci/virtio_pci.h,v 1.2 2011/12/06 06:28:32 grehan Exp $ + */ + +#ifndef _VIRTIO_PCI_H +#define _VIRTIO_PCI_H + +/* VirtIO PCI vendor/device ID. */ +#define VIRTIO_PCI_VENDORID 0x1AF4 +#define VIRTIO_PCI_DEVICEID_MIN 0x1000 +#define VIRTIO_PCI_DEVICEID_MAX 0x103F + +/* VirtIO ABI version, this must match exactly. */ +#define VIRTIO_PCI_ABI_VERSION 0 + +/* + * VirtIO Header, located in BAR 0. + */ +#define VIRTIO_PCI_HOST_FEATURES 0 /* host's supported features (32bit, RO)*/ +#define VIRTIO_PCI_GUEST_FEATURES 4 /* guest's supported features (32, RW) */ +#define VIRTIO_PCI_QUEUE_PFN 8 /* physical address of VQ (32, RW) */ +#define VIRTIO_PCI_QUEUE_NUM 12 /* number of ring entries (16, RO) */ +#define VIRTIO_PCI_QUEUE_SEL 14 /* current VQ selection (16, RW) */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 /* notify host regarding VQ (16, RW) */ +#define VIRTIO_PCI_STATUS 18 /* device status register (8, RW) */ +#define VIRTIO_PCI_ISR 19 /* interrupt status register, reading + * also clears the register (8, RO) */ +/* Only if MSIX is enabled: */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 /* configuration change vector (16, RW) */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 /* vector for selected VQ notifications + (16, RW) */ + +/* The bit of the ISR which indicates a device has an interrupt. */ +#define VIRTIO_PCI_ISR_INTR 0x1 +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 +/* Vector value used to disable MSI for queue. */ +#define VIRTIO_MSI_NO_VECTOR 0xFFFF + +/* + * The remaining space is defined by each driver as the per-driver + * configuration space. + */ +#define VIRTIO_PCI_CONFIG(sc) \ + (((sc)->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) ? 24 : 20) + +/* + * How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. + */ +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +/* The alignment to use between consumer and producer parts of vring. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 + +#endif /* _VIRTIO_PCI_H */ diff --git a/sys/dev/virtual/virtio/virtio/Makefile b/sys/dev/virtual/virtio/virtio/Makefile new file mode 100644 index 0000000000..7071ce1404 --- /dev/null +++ b/sys/dev/virtual/virtio/virtio/Makefile @@ -0,0 +1,10 @@ +KMOD= virtio + +SRCS= virtio.c virtqueue.c virtio_if.h virtio_bus_if.h device_if.h bus_if.h\ + virtio_bus_if.c virtio_if.c + +MFILES= kern/bus_if.m kern/device_if.m +MFILES+=dev/virtual/virtio/virtio/virtio_if.m +MFILES+=dev/virtual/virtio/virtio/virtio_bus_if.m + +.include diff --git a/sys/dev/virtual/virtio/virtio/virtio.c b/sys/dev/virtual/virtio/virtio/virtio.c new file mode 100644 index 0000000000..ec8fcb3020 --- /dev/null +++ b/sys/dev/virtual/virtio/virtio/virtio.c @@ -0,0 +1,281 @@ +/*- + * Copyright (c) 2011, Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: src/sys/dev/virtio/virtio.c,v 1.1 2011/11/18 05:43:43 grehan Exp $ + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "virtio.h" +#include "virtqueue.h" + +#include "virtio_if.h" +#include "virtio_bus_if.h" + +static int virtio_modevent(module_t, int, void *); +static const char *virtio_feature_name(uint64_t, struct virtio_feature_desc *); + +static struct virtio_ident { + uint16_t devid; + char *name; +} virtio_ident_table[] = { + { VIRTIO_ID_NETWORK, "Network" }, + { VIRTIO_ID_BLOCK, "Block" }, + { VIRTIO_ID_CONSOLE, "Console" }, + { VIRTIO_ID_ENTROPY, "Entropy" }, + { VIRTIO_ID_BALLOON, "Balloon" }, + { VIRTIO_ID_IOMEMORY, "IOMemory" }, + { VIRTIO_ID_9P, "9P Transport" }, + + { 0, NULL } +}; + +/* Device independent features. */ +static struct virtio_feature_desc virtio_common_feature_desc[] = { + { VIRTIO_F_NOTIFY_ON_EMPTY, "NotifyOnEmpty" }, + { VIRTIO_RING_F_EVENT_IDX, "EventIdx" }, + { VIRTIO_F_BAD_FEATURE, "BadFeature" }, + + { 0, NULL } +}; + +const char * +virtio_device_name(uint16_t devid) +{ + struct virtio_ident *ident; + + for (ident = virtio_ident_table; ident->name != NULL; ident++) { + if (ident->devid == devid) + return (ident->name); + } + + return (NULL); +} + +int +virtio_get_device_type(device_t dev) +{ + uintptr_t devtype; + + devtype = -1; + + BUS_READ_IVAR(device_get_parent(dev), dev, + VIRTIO_IVAR_DEVTYPE, &devtype); + + return ((int) devtype); +} + +void +virtio_set_feature_desc(device_t dev, + struct virtio_feature_desc *feature_desc) +{ + + BUS_WRITE_IVAR(device_get_parent(dev), dev, + VIRTIO_IVAR_FEATURE_DESC, (uintptr_t) feature_desc); +} + +/* XXX(vsrinivas): Hmm, check this SBUF usage */ +void +virtio_describe(device_t dev, const char *msg, + uint64_t features, struct virtio_feature_desc *feature_desc) +{ + struct sbuf sb; + uint64_t val; + char *buf; + const char *name; + int n; + + if ((buf = kmalloc(512, M_TEMP, M_NOWAIT)) == NULL) { + device_printf(dev, "%s features: 0x%"PRIx64"\n", msg, + features); + return; + } + + sbuf_new(&sb, buf, 512, SBUF_FIXEDLEN); + sbuf_printf(&sb, "%s features: 0x%"PRIx64, msg, features); + + for (n = 0, val = 1ULL << 63; val != 0; val >>= 1) { + /* + * BAD_FEATURE is used to detect broken Linux clients + * and therefore is not applicable to FreeBSD. + */ + if (((features & val) == 0) || val == VIRTIO_F_BAD_FEATURE) + continue; + + if (n++ == 0) + sbuf_cat(&sb, " <"); + else + sbuf_cat(&sb, ","); + + name = NULL; + if (feature_desc != NULL) + name = virtio_feature_name(val, feature_desc); + if (name == NULL) + name = virtio_feature_name(val, + virtio_common_feature_desc); + + if (name == NULL) + sbuf_printf(&sb, "0x%"PRIx64, val); + else + sbuf_cat(&sb, name); + } + + if (n > 0) + sbuf_cat(&sb, ">"); + +#if __FreeBSD_version < 900020 + sbuf_finish(&sb); + if (sbuf_overflowed(&sb) == 0) +#else + if (sbuf_finish(&sb) == 0) +#endif + device_printf(dev, "%s\n", sbuf_data(&sb)); + + sbuf_delete(&sb); + kfree(buf, M_TEMP); +} + +static const char * +virtio_feature_name(uint64_t val, struct virtio_feature_desc *feature_desc) +{ + int i; + + for (i = 0; feature_desc[i].vfd_val != 0; i++) + if (val == feature_desc[i].vfd_val) + return (feature_desc[i].vfd_str); + + return (NULL); +} + +/* + * VirtIO bus method wrappers. + */ + +uint64_t +virtio_negotiate_features(device_t dev, uint64_t child_features) +{ + + return (VIRTIO_BUS_NEGOTIATE_FEATURES(device_get_parent(dev), + child_features)); +} + +int +virtio_alloc_virtqueues(device_t dev, int flags, int nvqs, + struct vq_alloc_info *info) +{ + + return (VIRTIO_BUS_ALLOC_VIRTQUEUES(device_get_parent(dev), flags, + nvqs, info)); +} + +int +virtio_setup_intr(device_t dev) +{ + + return (VIRTIO_BUS_SETUP_INTR(device_get_parent(dev))); +} + +int +virtio_with_feature(device_t dev, uint64_t feature) +{ + + return (VIRTIO_BUS_WITH_FEATURE(device_get_parent(dev), feature)); +} + +void +virtio_stop(device_t dev) +{ + + VIRTIO_BUS_STOP(device_get_parent(dev)); +} + +int +virtio_reinit(device_t dev, uint64_t features) +{ + + return (VIRTIO_BUS_REINIT(device_get_parent(dev), features)); +} + +void +virtio_reinit_complete(device_t dev) +{ + + VIRTIO_BUS_REINIT_COMPLETE(device_get_parent(dev)); +} + +void +virtio_read_device_config(device_t dev, bus_size_t offset, void *dst, int len) +{ + + VIRTIO_BUS_READ_DEVICE_CONFIG(device_get_parent(dev), + offset, dst, len); +} + +void +virtio_write_device_config(device_t dev, bus_size_t offset, void *dst, int len) +{ + + VIRTIO_BUS_WRITE_DEVICE_CONFIG(device_get_parent(dev), + offset, dst, len); +} + +static int +virtio_modevent(module_t mod, int type, void *unused) +{ + int error; + + error = 0; + + switch (type) { + case MOD_LOAD: + //case MOD_QUIESCE: + case MOD_UNLOAD: + case MOD_SHUTDOWN: + break; + default: + error = EOPNOTSUPP; + break; + } + + return (error); +} + +static moduledata_t virtio_mod = { + "virtio", + virtio_modevent, + 0 +}; + +DECLARE_MODULE(virtio, virtio_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); +MODULE_VERSION(virtio, 1); diff --git a/sys/dev/virtual/virtio/virtio/virtio.h b/sys/dev/virtual/virtio/virtio/virtio.h new file mode 100644 index 0000000000..f859e19453 --- /dev/null +++ b/sys/dev/virtual/virtio/virtio/virtio.h @@ -0,0 +1,141 @@ +/*- + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/dev/virtio/virtio.h,v 1.2 2011/12/06 06:28:32 grehan Exp $ + */ + +#ifndef _VIRTIO_H_ +#define _VIRTIO_H_ + +#include + +struct vq_alloc_info; + +/* VirtIO device IDs. */ +#define VIRTIO_ID_NETWORK 0x01 +#define VIRTIO_ID_BLOCK 0x02 +#define VIRTIO_ID_CONSOLE 0x03 +#define VIRTIO_ID_ENTROPY 0x04 +#define VIRTIO_ID_BALLOON 0x05 +#define VIRTIO_ID_IOMEMORY 0x06 +#define VIRTIO_ID_9P 0x09 + +/* Status byte for guest to report progress. */ +#define VIRTIO_CONFIG_STATUS_RESET 0x00 +#define VIRTIO_CONFIG_STATUS_ACK 0x01 +#define VIRTIO_CONFIG_STATUS_DRIVER 0x02 +#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04 +#define VIRTIO_CONFIG_STATUS_FAILED 0x80 + +/* + * Generate interrupt when the virtqueue ring is + * completely used, even if we've suppressed them. + */ +#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24) + +/* + * The guest should never negotiate this feature; it + * is used to detect faulty drivers. + */ +#define VIRTIO_F_BAD_FEATURE (1 << 30) + +/* + * Some VirtIO feature bits (currently bits 28 through 31) are + * reserved for the transport being used (eg. virtio_ring), the + * rest are per-device feature bits. + */ +#define VIRTIO_TRANSPORT_F_START 28 +#define VIRTIO_TRANSPORT_F_END 32 + +/* + * Maximum number of virtqueues per device. + */ +#define VIRTIO_MAX_VIRTQUEUES 8 + +/* + * VirtIO instance variables indices. + */ +#define VIRTIO_IVAR_DEVTYPE 1 +#define VIRTIO_IVAR_FEATURE_DESC 2 + +struct virtio_feature_desc { + uint64_t vfd_val; + char *vfd_str; +}; + +const char *virtio_device_name(uint16_t devid); +int virtio_get_device_type(device_t dev); +void virtio_set_feature_desc(device_t dev, + struct virtio_feature_desc *feature_desc); +void virtio_describe(device_t dev, const char *msg, + uint64_t features, struct virtio_feature_desc *feature_desc); + +/* + * VirtIO Bus Methods. + */ +uint64_t virtio_negotiate_features(device_t dev, uint64_t child_features); +int virtio_alloc_virtqueues(device_t dev, int flags, int nvqs, + struct vq_alloc_info *info); +int virtio_setup_intr(device_t dev); +int virtio_with_feature(device_t dev, uint64_t feature); +void virtio_stop(device_t dev); +int virtio_reinit(device_t dev, uint64_t features); +void virtio_reinit_complete(device_t dev); + +/* + * Read/write a variable amount from the device specific (ie, network) + * configuration region. This region is encoded in the same endian as + * the guest. + */ +void virtio_read_device_config(device_t dev, bus_size_t offset, + void *dst, int length); +void virtio_write_device_config(device_t dev, bus_size_t offset, + void *src, int length); + +/* Inlined device specific read/write functions for common lengths. */ +#define VIRTIO_RDWR_DEVICE_CONFIG(size, type) \ +static inline type \ +__CONCAT(virtio_read_dev_config_,size)(device_t dev, \ + bus_size_t offset) \ +{ \ + type val; \ + virtio_read_device_config(dev, offset, &val, sizeof(type)); \ + return (val); \ +} \ + \ +static inline void \ +__CONCAT(virtio_write_dev_config_,size)(device_t dev, \ + bus_size_t offset, type val) \ +{ \ + virtio_write_device_config(dev, offset, &val, sizeof(type)); \ +} + +VIRTIO_RDWR_DEVICE_CONFIG(1, uint8_t); +VIRTIO_RDWR_DEVICE_CONFIG(2, uint16_t); +VIRTIO_RDWR_DEVICE_CONFIG(4, uint32_t); + +#endif /* _VIRTIO_H_ */ diff --git a/sys/dev/virtual/virtio/virtio/virtio_bus_if.m b/sys/dev/virtual/virtio/virtio/virtio_bus_if.m new file mode 100644 index 0000000000..b9a387258e --- /dev/null +++ b/sys/dev/virtual/virtio/virtio/virtio_bus_if.m @@ -0,0 +1,90 @@ +#- +# Copyright (c) 2011, Bryan Venteicher +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD: src/sys/dev/virtio/virtio_bus_if.m,v 1.1 2011/11/18 05:43:43 grehan Exp $ + +#include + +INTERFACE virtio_bus; + +HEADER { +struct vq_alloc_info; +}; + +METHOD uint64_t negotiate_features { + device_t dev; + uint64_t child_features; +}; + +METHOD int with_feature { + device_t dev; + uint64_t feature; +}; + +METHOD int alloc_virtqueues { + device_t dev; + int flags; + int nvqs; + struct vq_alloc_info *info; +}; +HEADER { +#define VIRTIO_ALLOC_VQS_DISABLE_MSIX 0x1 +}; + +METHOD int setup_intr { + device_t dev; +}; + +METHOD void stop { + device_t dev; +}; + +METHOD int reinit { + device_t dev; + uint64_t features; +}; + +METHOD void reinit_complete { + device_t dev; +}; + +METHOD void notify_vq { + device_t dev; + uint16_t queue; +}; + +METHOD void read_device_config { + device_t dev; + bus_size_t offset; + void *dst; + int len; +}; + +METHOD void write_device_config { + device_t dev; + bus_size_t offset; + void *src; + int len; +}; diff --git a/sys/dev/virtual/virtio/virtio/virtio_if.m b/sys/dev/virtual/virtio/virtio/virtio_if.m new file mode 100644 index 0000000000..103c8a905f --- /dev/null +++ b/sys/dev/virtual/virtio/virtio/virtio_if.m @@ -0,0 +1,43 @@ +#- +# Copyright (c) 2011, Bryan Venteicher +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD: src/sys/dev/virtio/virtio_if.m,v 1.1 2011/11/18 05:43:43 grehan Exp $ + +#include + +INTERFACE virtio; + +CODE { + static int + virtio_default_config_change(device_t dev) + { + /* Return that we've handled the change. */ + return (1); + } +}; + +METHOD int config_change { + device_t dev; +} DEFAULT virtio_default_config_change; diff --git a/sys/dev/virtual/virtio/virtio/virtio_ring.h b/sys/dev/virtual/virtio/virtio/virtio_ring.h new file mode 100644 index 0000000000..df0d94d8a9 --- /dev/null +++ b/sys/dev/virtual/virtio/virtio/virtio_ring.h @@ -0,0 +1,164 @@ +/*- + * Copyright Rusty Russell IBM Corporation 2007. + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/dev/virtio/virtio_ring.h,v 1.3 2012/04/14 05:48:04 grehan Exp $ + */ + +#ifndef VIRTIO_RING_H +#define VIRTIO_RING_H + +#include + +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 + +/* The Host uses this in used->flags to advise the Guest: don't kick me + * when you add a buffer. It's unreliable, so it's simply an + * optimization. Guest will still kick if it's out of buffers. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* The Guest uses this in avail->flags to advise the Host: don't + * interrupt me when you consume a buffer. It's unreliable, so it's + * simply an optimization. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* VirtIO ring descriptors: 16 bytes. + * These can chain together via "next". */ +struct vring_desc { + /* Address (guest-physical). */ + uint64_t addr; + /* Length. */ + uint32_t len; + /* The flags as indicated above. */ + uint16_t flags; + /* We chain unused descriptors via this, too. */ + uint16_t next; +}; + +struct vring_avail { + uint16_t flags; + uint16_t idx; + uint16_t ring[0]; +}; + +/* uint32_t is used here for ids for padding reasons. */ +struct vring_used_elem { + /* Index of start of used descriptor chain. */ + uint32_t id; + /* Total length of the descriptor chain which was written to. */ + uint32_t len; +}; + +struct vring_used { + uint16_t flags; + uint16_t idx; + struct vring_used_elem ring[0]; +}; + +struct vring { + unsigned int num; + + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; +}; + +/* The standard layout for the ring is a continuous chunk of memory which + * looks like this. We assume num is a power of 2. + * + * struct vring { + * // The actual descriptors (16 bytes each) + * struct vring_desc desc[num]; + * + * // A ring of available descriptor heads with free-running index. + * __u16 avail_flags; + * __u16 avail_idx; + * __u16 available[num]; + * __u16 used_event_idx; + * + * // Padding to the next align boundary. + * char pad[]; + * + * // A ring of used descriptor heads with free-running index. + * __u16 used_flags; + * __u16 used_idx; + * struct vring_used_elem used[num]; + * __u16 avail_event_idx; + * }; + * + * NOTE: for VirtIO PCI, align is 4096. + */ + +/* + * We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. + */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num]) + +static inline int +vring_size(unsigned int num, unsigned long align) +{ + int size; + + size = num * sizeof(struct vring_desc); + size += sizeof(struct vring_avail) + (num * sizeof(uint16_t)); + size = (size + align - 1) & ~(align - 1); + size += sizeof(struct vring_used) + + (num * sizeof(struct vring_used_elem)); + return (size); +} + +static inline void +vring_init(struct vring *vr, unsigned int num, uint8_t *p, + unsigned long align) +{ + vr->num = num; + vr->desc = (struct vring_desc *) p; + vr->avail = (struct vring_avail *) (p + + num * sizeof(struct vring_desc)); + vr->used = (void *) + (((unsigned long) &vr->avail->ring[num] + align-1) & ~(align-1)); +} + +/* + * The following is used with VIRTIO_RING_F_EVENT_IDX. + * + * Assuming a given event_idx value from the other size, if we have + * just incremented index from old to new_idx, should we trigger an + * event? + */ +static inline int +vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) +{ + + return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); +} +#endif /* VIRTIO_RING_H */ diff --git a/sys/dev/virtual/virtio/virtio/virtqueue.c b/sys/dev/virtual/virtio/virtio/virtqueue.c new file mode 100644 index 0000000000..511efa3795 --- /dev/null +++ b/sys/dev/virtual/virtio/virtio/virtqueue.c @@ -0,0 +1,639 @@ +/*- + * Copyright (c) 2011, Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: src/sys/dev/virtio/virtqueue.c,v 1.2 2012/04/14 05:48:04 grehan Exp $ + */ + +/* + * Implements the virtqueue interface as basically described + * in the original VirtIO paper. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "virtio.h" +#include "virtqueue.h" +#include "virtio_ring.h" + +#include "virtio_bus_if.h" + +struct virtqueue { + device_t vq_dev; + char vq_name[VIRTQUEUE_MAX_NAME_SZ]; + uint16_t vq_queue_index; + uint16_t vq_nentries; + uint32_t vq_flags; + +#define VIRTQUEUE_FLAG_EVENT_IDX 0x0002 + + int vq_alignment; + int vq_ring_size; + void *vq_ring_mem; + + virtqueue_intr_t *vq_intrhand; + void *vq_intrhand_arg; + + struct vring vq_ring; + uint16_t vq_free_cnt; + uint16_t vq_queued_cnt; + /* + * Head of the free chain in the descriptor table. If + * there are no free descriptors, this will be set to + * VQ_RING_DESC_CHAIN_END. + */ + uint16_t vq_desc_head_idx; + /* + * Last consumed descriptor in the used table, + * trails vq_ring.used->idx. + */ + uint16_t vq_used_cons_idx; + + struct vq_desc_extra { + void *cookie; + uint16_t ndescs; + } vq_descx[0]; +}; + +/* + * The maximum virtqueue size is 2^15. Use that value as the end of + * descriptor chain terminator since it will never be a valid index + * in the descriptor table. This is used to verify we are correctly + * handling vq_free_cnt. + */ +#define VQ_RING_DESC_CHAIN_END 32768 + +#define VQASSERT(_vq, _exp, _msg, ...) \ + KASSERT((_exp),("%s: %s - "_msg, __func__, (_vq)->vq_name, \ + ##__VA_ARGS__)) + +#define VQ_RING_ASSERT_VALID_IDX(_vq, _idx) \ + VQASSERT((_vq), (_idx) < (_vq)->vq_nentries, \ + "invalid ring index: %d, max: %d", (_idx), \ + (_vq)->vq_nentries) + +#define VQ_RING_ASSERT_CHAIN_TERM(_vq) \ + VQASSERT((_vq), (_vq)->vq_desc_head_idx == \ + VQ_RING_DESC_CHAIN_END, "full ring terminated " \ + "incorrectly: head idx: %d", (_vq)->vq_desc_head_idx) + +static void vq_ring_init(struct virtqueue *); +static void vq_ring_update_avail(struct virtqueue *, uint16_t); +static uint16_t vq_ring_enqueue_segments(struct virtqueue *, + struct vring_desc *, uint16_t, struct sglist *, int, int); +static int vq_ring_must_notify_host(struct virtqueue *); +static void vq_ring_notify_host(struct virtqueue *); +static void vq_ring_free_chain(struct virtqueue *, uint16_t); + +uint64_t +virtqueue_filter_features(uint64_t features) +{ + uint64_t mask; + + mask = (1 << VIRTIO_TRANSPORT_F_START) - 1; + mask |= VIRTIO_RING_F_EVENT_IDX; + + return (features & mask); +} + +int +virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, int align, + vm_paddr_t highaddr, struct vq_alloc_info *info, struct virtqueue **vqp) +{ + struct virtqueue *vq; + int error; + + *vqp = NULL; + error = 0; + + if (size == 0) { + device_printf(dev, + "virtqueue %d (%s) does not exist (size is zero)\n", + queue, info->vqai_name); + return (ENODEV); + } else if (!powerof2(size)) { + device_printf(dev, + "virtqueue %d (%s) size is not a power of 2: %d\n", + queue, info->vqai_name, size); + return (ENXIO); + } + + vq = kmalloc(sizeof(struct virtqueue) + + size * sizeof(struct vq_desc_extra), M_DEVBUF, M_NOWAIT | M_ZERO); + if (vq == NULL) { + device_printf(dev, "cannot allocate virtqueue\n"); + return (ENOMEM); + } + + vq->vq_dev = dev; + strlcpy(vq->vq_name, info->vqai_name, sizeof(vq->vq_name)); + vq->vq_queue_index = queue; + vq->vq_alignment = align; + vq->vq_nentries = size; + vq->vq_free_cnt = size; + vq->vq_intrhand = info->vqai_intr; + vq->vq_intrhand_arg = info->vqai_intr_arg; + + if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0) + vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX; + + vq->vq_ring_size = round_page(vring_size(size, align)); + vq->vq_ring_mem = contigmalloc(vq->vq_ring_size, M_DEVBUF, + M_NOWAIT | M_ZERO, 0, highaddr, PAGE_SIZE, 0); + if (vq->vq_ring_mem == NULL) { + device_printf(dev, + "cannot allocate memory for virtqueue ring\n"); + error = ENOMEM; + goto fail; + } + + vq_ring_init(vq); + virtqueue_disable_intr(vq); + + *vqp = vq; + +fail: + if (error) + virtqueue_free(vq); + + return (error); +} + +int +virtqueue_reinit(struct virtqueue *vq, uint16_t size) +{ + struct vq_desc_extra *dxp; + int i; + + if (vq->vq_nentries != size) { + device_printf(vq->vq_dev, + "%s: '%s' changed size; old=%hu, new=%hu\n", + __func__, vq->vq_name, vq->vq_nentries, size); + return (EINVAL); + } + + /* Warn if the virtqueue was not properly cleaned up. */ + if (vq->vq_free_cnt != vq->vq_nentries) { + device_printf(vq->vq_dev, + "%s: warning, '%s' virtqueue not empty, " + "leaking %d entries\n", __func__, vq->vq_name, + vq->vq_nentries - vq->vq_free_cnt); + } + + vq->vq_desc_head_idx = 0; + vq->vq_used_cons_idx = 0; + vq->vq_queued_cnt = 0; + vq->vq_free_cnt = vq->vq_nentries; + + /* To be safe, reset all our allocated memory. */ + bzero(vq->vq_ring_mem, vq->vq_ring_size); + for (i = 0; i < vq->vq_nentries; i++) { + dxp = &vq->vq_descx[i]; + dxp->cookie = NULL; + dxp->ndescs = 0; + } + + vq_ring_init(vq); + virtqueue_disable_intr(vq); + + return (0); +} + +void +virtqueue_free(struct virtqueue *vq) +{ + + if (vq->vq_free_cnt != vq->vq_nentries) { + device_printf(vq->vq_dev, "%s: freeing non-empty virtqueue, " + "leaking %d entries\n", vq->vq_name, + vq->vq_nentries - vq->vq_free_cnt); + } + + if (vq->vq_ring_mem != NULL) { + contigfree(vq->vq_ring_mem, vq->vq_ring_size, M_DEVBUF); + vq->vq_ring_size = 0; + vq->vq_ring_mem = NULL; + } + + kfree(vq, M_DEVBUF); +} + +vm_paddr_t +virtqueue_paddr(struct virtqueue *vq) +{ + return (vtophys(vq->vq_ring_mem)); +} + +int +virtqueue_size(struct virtqueue *vq) +{ + return (vq->vq_nentries); +} + +int +virtqueue_empty(struct virtqueue *vq) +{ + + return (vq->vq_nentries == vq->vq_free_cnt); +} + +int +virtqueue_full(struct virtqueue *vq) +{ + + return (vq->vq_free_cnt == 0); +} + +void +virtqueue_notify(struct virtqueue *vq, struct spinlock *interlock) +{ + /* Ensure updated avail->idx is visible to host. */ + cpu_mfence(); + + if (vq_ring_must_notify_host(vq)) { + spin_unlock(interlock); + vq_ring_notify_host(vq); + spin_lock(interlock); + } + vq->vq_queued_cnt = 0; +} + +int +virtqueue_nused(struct virtqueue *vq) +{ + uint16_t used_idx, nused; + + used_idx = vq->vq_ring.used->idx; + nused = (uint16_t)(used_idx - vq->vq_used_cons_idx); + VQASSERT(vq, nused <= vq->vq_nentries, "used more than available"); + + return (nused); +} + +int +virtqueue_intr(struct virtqueue *vq) +{ + + if (vq->vq_intrhand == NULL || + vq->vq_used_cons_idx == vq->vq_ring.used->idx) + return (0); + + vq->vq_intrhand(vq->vq_intrhand_arg); + + return (1); +} + +/* + * Enable interrupts on a given virtqueue. Returns 1 if there are + * additional entries to process on the virtqueue after we return. + */ +int +virtqueue_enable_intr(struct virtqueue *vq) +{ + /* + * Enable interrupts, making sure we get the latest + * index of what's already been consumed. + */ + vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { + vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx; + } else { + vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + } + + cpu_mfence(); + + /* + * Additional items may have been consumed in the time between + * since we last checked and enabled interrupts above. Let our + * caller know so it processes the new entries. + */ + if (vq->vq_used_cons_idx != vq->vq_ring.used->idx) + return (1); + + return (0); +} + +int +virtqueue_postpone_intr(struct virtqueue *vq) +{ + uint16_t ndesc; + + /* + * Postpone until at least half of the available descriptors + * have been consumed. + * + * XXX Adaptive factor? (Linux uses 3/4) + */ + ndesc = (uint16_t)(vq->vq_ring.avail->idx - vq->vq_used_cons_idx) / 2; + + if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) + vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc; + else + vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + + cpu_mfence(); + + /* + * Enough items may have already been consumed to meet our + * threshold since we last checked. Let our caller know so + * it processes the new entries. + */ + if (virtqueue_nused(vq) > ndesc) + return (1); + + return (0); +} + +void +virtqueue_disable_intr(struct virtqueue *vq) +{ + /* + * Note this is only considered a hint to the host. + */ + if ((vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) == 0) + vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; +} + +int +virtqueue_enqueue(struct virtqueue *vq, void *cookie, struct sglist *sg, + int readable, int writable) +{ + struct vq_desc_extra *dxp; + int needed; + uint16_t head_idx, idx; + + needed = readable + writable; + + VQASSERT(vq, cookie != NULL, "enqueuing with no cookie"); + VQASSERT(vq, needed == sg->sg_nseg, + "segment count mismatch, %d, %d", needed, sg->sg_nseg); + + if (needed < 1) + return (EINVAL); + if (vq->vq_free_cnt == 0) + return (ENOSPC); + if (vq->vq_free_cnt < needed) + return (EMSGSIZE); + + head_idx = vq->vq_desc_head_idx; + VQ_RING_ASSERT_VALID_IDX(vq, head_idx); + dxp = &vq->vq_descx[head_idx]; + + VQASSERT(vq, dxp->cookie == NULL, + "cookie already exists for index %d", head_idx); + dxp->cookie = cookie; + dxp->ndescs = needed; + + idx = vq_ring_enqueue_segments(vq, vq->vq_ring.desc, head_idx, + sg, readable, writable); + + vq->vq_desc_head_idx = idx; + vq->vq_free_cnt -= needed; + if (vq->vq_free_cnt == 0) + VQ_RING_ASSERT_CHAIN_TERM(vq); + else + VQ_RING_ASSERT_VALID_IDX(vq, idx); + + vq_ring_update_avail(vq, head_idx); + + return (0); +} + +void * +virtqueue_dequeue(struct virtqueue *vq, uint32_t *len) +{ + struct vring_used_elem *uep; + void *cookie; + uint16_t used_idx, desc_idx; + + if (vq->vq_used_cons_idx == vq->vq_ring.used->idx) + return (NULL); + + used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1); + uep = &vq->vq_ring.used->ring[used_idx]; + + cpu_mfence(); + desc_idx = (uint16_t) uep->id; + if (len != NULL) + *len = uep->len; + + vq_ring_free_chain(vq, desc_idx); + + cookie = vq->vq_descx[desc_idx].cookie; + VQASSERT(vq, cookie != NULL, "no cookie for index %d", desc_idx); + vq->vq_descx[desc_idx].cookie = NULL; + + return (cookie); +} + +void * +virtqueue_poll(struct virtqueue *vq, uint32_t *len) +{ + void *cookie; + + /* We only poll the virtqueue when dumping to virtio-blk */ + while ((cookie = virtqueue_dequeue(vq, len)) == NULL) + ; + + return (cookie); +} + +void * +virtqueue_drain(struct virtqueue *vq, int *last) +{ + void *cookie; + int idx; + + cookie = NULL; + idx = *last; + + while (idx < vq->vq_nentries && cookie == NULL) { + if ((cookie = vq->vq_descx[idx].cookie) != NULL) { + vq->vq_descx[idx].cookie = NULL; + /* Free chain to keep free count consistent. */ + vq_ring_free_chain(vq, idx); + } + idx++; + } + + *last = idx; + + return (cookie); +} + +void +virtqueue_dump(struct virtqueue *vq) +{ + + if (vq == NULL) + return; + + kprintf("VQ: %s - size=%d; free=%d; used=%d; queued=%d; " + "desc_head_idx=%d; avail.idx=%d; used_cons_idx=%d; " + "used.idx=%d; avail.flags=0x%x; used.flags=0x%x\n", + vq->vq_name, vq->vq_nentries, vq->vq_free_cnt, + virtqueue_nused(vq), vq->vq_queued_cnt, vq->vq_desc_head_idx, + vq->vq_ring.avail->idx, vq->vq_used_cons_idx, + vq->vq_ring.used->idx, vq->vq_ring.avail->flags, + vq->vq_ring.used->flags); +} + +static void +vq_ring_init(struct virtqueue *vq) +{ + struct vring *vr; + char *ring_mem; + int i, size; + + ring_mem = vq->vq_ring_mem; + size = vq->vq_nentries; + vr = &vq->vq_ring; + + vring_init(vr, size, ring_mem, vq->vq_alignment); + + for (i = 0; i < size - 1; i++) + vr->desc[i].next = i + 1; + vr->desc[i].next = VQ_RING_DESC_CHAIN_END; +} + +static void +vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx) +{ + uint16_t avail_idx; + + /* + * Place the head of the descriptor chain into the next slot and make + * it usable to the host. The chain is made available now rather than + * deferring to virtqueue_notify() in the hopes that if the host is + * currently running on another CPU, we can keep it processing the new + * descriptor. + */ + avail_idx = vq->vq_ring.avail->idx & (vq->vq_nentries - 1); + vq->vq_ring.avail->ring[avail_idx] = desc_idx; + + cpu_mfence(); + vq->vq_ring.avail->idx++; + + /* Keep pending count until virtqueue_notify() for debugging. */ + vq->vq_queued_cnt++; +} + +static uint16_t +vq_ring_enqueue_segments(struct virtqueue *vq, struct vring_desc *desc, + uint16_t head_idx, struct sglist *sg, int readable, int writable) +{ + struct sglist_seg *seg; + struct vring_desc *dp; + int i, needed; + uint16_t idx; + + needed = readable + writable; + + for (i = 0, idx = head_idx, seg = sg->sg_segs; + i < needed; + i++, idx = dp->next, seg++) { + VQASSERT(vq, idx != VQ_RING_DESC_CHAIN_END, + "premature end of free desc chain"); + + dp = &desc[idx]; + dp->addr = seg->ss_paddr; + dp->len = seg->ss_len; + dp->flags = 0; + + if (i < needed - 1) + dp->flags |= VRING_DESC_F_NEXT; + if (i >= readable) + dp->flags |= VRING_DESC_F_WRITE; + } + + return (idx); +} + +static int +vq_ring_must_notify_host(struct virtqueue *vq) +{ + uint16_t new_idx, prev_idx, event_idx; + + if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { + new_idx = vq->vq_ring.avail->idx; + prev_idx = new_idx - vq->vq_queued_cnt; + event_idx = vring_avail_event(&vq->vq_ring); + + return (vring_need_event(event_idx, new_idx, prev_idx) != 0); + } + + return ((vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0); +} + +static void +vq_ring_notify_host(struct virtqueue *vq) +{ + VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index); +} + +static void +vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) +{ + struct vring_desc *dp; + struct vq_desc_extra *dxp; + + VQ_RING_ASSERT_VALID_IDX(vq, desc_idx); + dp = &vq->vq_ring.desc[desc_idx]; + dxp = &vq->vq_descx[desc_idx]; + + if (vq->vq_free_cnt == 0) + VQ_RING_ASSERT_CHAIN_TERM(vq); + + vq->vq_free_cnt += dxp->ndescs; + dxp->ndescs--; + + while (dp->flags & VRING_DESC_F_NEXT) { + VQ_RING_ASSERT_VALID_IDX(vq, dp->next); + dp = &vq->vq_ring.desc[dp->next]; + dxp->ndescs--; + } + VQASSERT(vq, dxp->ndescs == 0, "failed to free entire desc chain"); + + /* + * We must append the existing free chain, if any, to the end of + * newly freed chain. If the virtqueue was completely used, then + * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). + */ + dp->next = vq->vq_desc_head_idx; + vq->vq_desc_head_idx = desc_idx; +} diff --git a/sys/dev/virtual/virtio/virtio/virtqueue.h b/sys/dev/virtual/virtio/virtio/virtqueue.h new file mode 100644 index 0000000000..ceaa0e62c7 --- /dev/null +++ b/sys/dev/virtual/virtio/virtio/virtqueue.h @@ -0,0 +1,95 @@ +/*- + * Copyright (c) 2011, Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: src/sys/dev/virtio/virtqueue.h,v 1.2 2012/04/14 05:48:04 grehan Exp $ + */ + +#ifndef _VIRTIO_VIRTQUEUE_H +#define _VIRTIO_VIRTQUEUE_H + +#include + +struct virtqueue; +struct sglist; +struct spinlock; + +/* The guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. + * The host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. + */ +#define VIRTIO_RING_F_EVENT_IDX (1 << 29) + +/* Device callback for a virtqueue interrupt. */ +typedef int virtqueue_intr_t(void *); + +#define VIRTQUEUE_MAX_NAME_SZ 32 + +/* One for each virtqueue the device wishes to allocate. */ +struct vq_alloc_info { + char vqai_name[VIRTQUEUE_MAX_NAME_SZ]; + virtqueue_intr_t *vqai_intr; + void *vqai_intr_arg; + struct virtqueue **vqai_vq; +}; + +#define VQ_ALLOC_INFO_INIT(_i,_nsegs,_intr,_arg,_vqp,_str,...) do { \ + ksnprintf((_i)->vqai_name, VIRTQUEUE_MAX_NAME_SZ, _str, \ + ##__VA_ARGS__); \ + (_i)->vqai_intr = (_intr); \ + (_i)->vqai_intr_arg = (_arg); \ + (_i)->vqai_vq = (_vqp); \ +} while (0) + +uint64_t virtqueue_filter_features(uint64_t features); + +int virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, + int align, vm_paddr_t highaddr, struct vq_alloc_info *info, + struct virtqueue **vqp); +void *virtqueue_drain(struct virtqueue *vq, int *last); +void virtqueue_free(struct virtqueue *vq); +int virtqueue_reinit(struct virtqueue *vq, uint16_t size); + +int virtqueue_intr(struct virtqueue *vq); +int virtqueue_enable_intr(struct virtqueue *vq); +int virtqueue_postpone_intr(struct virtqueue *vq); +void virtqueue_disable_intr(struct virtqueue *vq); + +/* Get physical address of the virtqueue ring. */ +vm_paddr_t virtqueue_paddr(struct virtqueue *vq); + +int virtqueue_full(struct virtqueue *vq); +int virtqueue_empty(struct virtqueue *vq); +int virtqueue_size(struct virtqueue *vq); +int virtqueue_nused(struct virtqueue *vq); +void virtqueue_notify(struct virtqueue *vq, struct spinlock *); +void virtqueue_dump(struct virtqueue *vq); + +int virtqueue_enqueue(struct virtqueue *vq, void *cookie, + struct sglist *sg, int readable, int writable); +void *virtqueue_dequeue(struct virtqueue *vq, uint32_t *len); +void *virtqueue_poll(struct virtqueue *vq, uint32_t *len); + +#endif /* _VIRTIO_VIRTQUEUE_H */ -- 2.41.0