From 68b3ccd46fc416a468da709a847cefe4c5cd3cef Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 27 Aug 2010 17:12:01 -0700 Subject: [PATCH] vkernel - Fix deadlocks with cothread locks * Callbacks into the main kernel are not allowed when holding a cothread lock as this could cause a deadlock between cpus. If the callback into the kernel blocks the cothread lock, being a pthreads lock, remains locked. * Refactor the network and disk pipeline to hold the cothread lock for a much shorter period of time, allowing data to be pipelined without any stall conditions. * For vknet if the tx mbuf return fifo is full we wait until it isn't. --- sys/dev/virtual/disk/vdisk.c | 15 +- sys/dev/virtual/net/if_vke.c | 188 +++++++++++---------- sys/kern/kern_intr.c | 3 +- sys/platform/vkernel/platform/cothread.c | 19 ++- sys/platform/vkernel64/platform/cothread.c | 19 ++- 5 files changed, 138 insertions(+), 106 deletions(-) diff --git a/sys/dev/virtual/disk/vdisk.c b/sys/dev/virtual/disk/vdisk.c index 4f0291e360..70f225f132 100644 --- a/sys/dev/virtual/disk/vdisk.c +++ b/sys/dev/virtual/disk/vdisk.c @@ -187,17 +187,26 @@ vkd_io_intr(cothread_t cotd) { struct vkd_softc *sc; struct bio *bio; + TAILQ_HEAD(, bio) tmpq; sc = cotd->arg; + /* + * We can't call back into the kernel while holding cothread! + */ + TAILQ_INIT(&tmpq); cothread_lock(cotd, 0); - while (!TAILQ_EMPTY(&sc->cotd_done)) { - bio = TAILQ_FIRST(&sc->cotd_done); + while ((bio = TAILQ_FIRST(&sc->cotd_done)) != NULL) { TAILQ_REMOVE(&sc->cotd_done, bio, bio_act); + TAILQ_INSERT_TAIL(&tmpq, bio, bio_act); + } + cothread_unlock(cotd, 0); + + while ((bio = TAILQ_FIRST(&tmpq)) != NULL) { + TAILQ_REMOVE(&tmpq, bio, bio_act); devstat_end_transaction_buf(&sc->stats, bio->bio_buf); biodone(bio); } - cothread_unlock(sc->cotd, 0); } /* diff --git a/sys/dev/virtual/net/if_vke.c b/sys/dev/virtual/net/if_vke.c index 1c24c3ed43..8d4336d51b 100644 --- a/sys/dev/virtual/net/if_vke.c +++ b/sys/dev/virtual/net/if_vke.c @@ -155,8 +155,9 @@ vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m) { fifo_t fifo = sc->sc_txfifo_done; - if (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex)) - return (-1); + while (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex)) { + usleep(20000); + } fifo->array[NETFIFOINDEX(fifo->windex)] = m; cpu_sfence(); @@ -184,21 +185,18 @@ vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm) } /* - * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo. Wake up the - * cothread via cothread_signal(). + * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo. */ static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m) { fifo_t fifo = sc->sc_txfifo; - cothread_t cotd = sc->cotd_tx; if (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex)) return (-1); fifo->array[NETFIFOINDEX(fifo->windex)] = m; cpu_sfence(); - cothread_signal(cotd); ++fifo->windex; return (0); @@ -225,6 +223,16 @@ vke_txfifo_dequeue(struct vke_softc *sc) return (m); } +static int +vke_txfifo_empty(struct vke_softc *sc) +{ + fifo_t fifo = sc->sc_txfifo; + + if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex)) + return (1); + return(0); +} + /* * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one * exists replacing it with newm which should point to a newly allocated @@ -272,7 +280,6 @@ vke_init(void *xsc) ASSERT_SERIALIZED(ifp->if_serializer); - vke_stop(sc); ifp->if_flags |= IFF_RUNNING; @@ -311,6 +318,13 @@ vke_init(void *xsc) } +/* + * Called from kernel. + * + * NOTE: We can't make any kernel callbacks while holding cothread lock + * because the cothread lock is not governed by the kernel scheduler + * (so mplock, tokens, etc will not bbe released). + */ static void vke_start(struct ifnet *ifp) { @@ -324,25 +338,24 @@ vke_start(struct ifnet *ifp) if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING) return; - cothread_lock(cotd, 0); count = 0; - while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) { if (vke_txfifo_enqueue(sc, m) != -1) { if (count++ == VKE_CHUNK) { + cothread_lock(cotd, 0); cothread_signal(cotd); + cothread_unlock(cotd, 0); count = 0; } } else { m_freem(m); } } - if (count) { + cothread_lock(cotd, 0); cothread_signal(cotd); + cothread_unlock(cotd, 0); } - - cothread_unlock(cotd, 0); } static int @@ -480,6 +493,7 @@ vke_rx_intr(cothread_t cotd) ifnet_deserialize_all(ifp); return; } + cothread_unlock(cotd, 0); while ((m = vke_rxfifo_sniff(sc)) != NULL) { nm = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR); @@ -487,7 +501,9 @@ vke_rx_intr(cothread_t cotd) vke_rxfifo_dequeue(sc, nm); ifp->if_input(ifp, m); if (count++ == VKE_CHUNK) { + cothread_lock(cotd, 0); cothread_signal(cotd); + cothread_unlock(cotd, 0); count = 0; } } else { @@ -495,10 +511,11 @@ vke_rx_intr(cothread_t cotd) } } - if (count) + if (count) { + cothread_lock(cotd, 0); cothread_signal(cotd); - - cothread_unlock(cotd, 0); + cothread_unlock(cotd, 0); + } ifnet_deserialize_all(ifp); } @@ -515,22 +532,23 @@ vke_tx_intr(cothread_t cotd) ifnet_serialize_all(ifp); cothread_lock(cotd, 0); - if (sc->cotd_tx_exit != VKE_COTD_RUN) { cothread_unlock(cotd, 0); ifnet_deserialize_all(ifp); return; } + cothread_unlock(cotd, 0); - if ((ifp->if_flags & IFF_RUNNING) == 0) - ifp->if_start(ifp); - - /* Free TX mbufs that have been processed */ + /* + * Free TX mbufs that have been processed before starting new + * ones going to be pipeline friendly. + */ while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) { m_freem(m); } - cothread_unlock(cotd, 0); + if ((ifp->if_flags & IFF_RUNNING) == 0) + ifp->if_start(ifp); ifnet_deserialize_all(ifp); } @@ -544,10 +562,11 @@ vke_rx_thread(cothread_t cotd) struct mbuf *m; struct vke_softc *sc = cotd->arg; struct ifnet *ifp = &sc->arpcom.ac_if; - int count; fifo_t fifo = sc->sc_rxfifo; fd_set fdset; struct timeval tv; + int count; + int n; /* Select timeout cannot be infinite since we need to check for * the exit flag sc->cotd_rx_exit. @@ -556,58 +575,52 @@ vke_rx_thread(cothread_t cotd) tv.tv_usec = 500000; FD_ZERO(&fdset); + count = 0; - cothread_lock(cotd, 1); + while (sc->cotd_rx_exit == VKE_COTD_RUN) { + /* + * Wait for the RX FIFO to be loaded with + * empty mbufs. + */ + if (NETFIFOINDEX(fifo->windex + 1) == + NETFIFOINDEX(fifo->rindex)) { + usleep(20000); + continue; + } - for (;;) { - int n; - count = 0; - while (sc->cotd_rx_exit == VKE_COTD_RUN) { - /* Wait for the RX FIFO to drain */ - while (NETFIFOINDEX(fifo->windex + 1) == - NETFIFOINDEX(fifo->rindex)) { - usleep(20000); + /* + * Load data into the rx fifo + */ + m = fifo->array[NETFIFOINDEX(fifo->windex)]; + if (m == NULL) + continue; + n = read(sc->sc_fd, mtod(m, void *), MCLBYTES); + if (n > 0) { + ifp->if_ipackets++; + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.len = m->m_len = n; + cpu_sfence(); + ++fifo->windex; + if (count++ == VKE_CHUNK) { + cothread_intr(cotd); + count = 0; } - - if ((m = fifo->array[NETFIFOINDEX(fifo->windex)]) != - NULL) { - cothread_unlock(cotd, 1); - n = read(sc->sc_fd, mtod(m, void *), MCLBYTES); - cothread_lock(cotd, 1); - if (n <= 0) - break; - ifp->if_ipackets++; - m->m_pkthdr.rcvif = ifp; - m->m_pkthdr.len = m->m_len = n; - ++fifo->windex; - cpu_sfence(); - if (count++ == VKE_CHUNK) { - cothread_intr(cotd); - count = 0; - } + } else { + if (count) { + cothread_intr(cotd); + count = 0; } - } + FD_SET(sc->sc_fd, &fdset); - if (count) { - cothread_intr(cotd); + if (select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv) == -1) { + kprintf(VKE_DEVNAME "%d: select failed for " + "TAP device\n", sc->sc_unit); + usleep(1000000); + } } - - if (sc->cotd_rx_exit != VKE_COTD_RUN) - break; - - cothread_unlock(cotd, 1); - - /* Set up data for select() call */ - FD_SET(sc->sc_fd, &fdset); - - if (select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv) == -1) - kprintf(VKE_DEVNAME "%d: select failed for TAP device\n", sc->sc_unit); - - cothread_lock(cotd, 1); } - + cpu_sfence(); sc->cotd_rx_exit = VKE_COTD_DEAD; - cothread_unlock(cotd, 1); } /* @@ -621,40 +634,41 @@ vke_tx_thread(cothread_t cotd) struct ifnet *ifp = &sc->arpcom.ac_if; int count = 0; - cothread_lock(cotd, 1); - while (sc->cotd_tx_exit == VKE_COTD_RUN) { - /* Write outgoing packets to the TAP interface */ - while ((m = vke_txfifo_dequeue(sc)) != NULL) { + /* + * Write outgoing packets to the TAP interface + */ + m = vke_txfifo_dequeue(sc); + if (m) { if (m->m_pkthdr.len <= MCLBYTES) { m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf); sc->sc_txbuf_len = m->m_pkthdr.len; - cothread_unlock(cotd, 1); - if (write(sc->sc_fd, sc->sc_txbuf, sc->sc_txbuf_len) < 0) { - cothread_lock(cotd, 1); + if (write(sc->sc_fd, sc->sc_txbuf, + sc->sc_txbuf_len) < 0) { ifp->if_oerrors++; } else { - cothread_lock(cotd, 1); - vke_txfifo_done_enqueue(sc, m); ifp->if_opackets++; - if (count++ == VKE_CHUNK) { - cothread_intr(cotd); - count = 0; - } } } + if (count++ == VKE_CHUNK) { + cothread_intr(cotd); + count = 0; + } + vke_txfifo_done_enqueue(sc, m); + } else { + if (count) { + cothread_intr(cotd); + count = 0; + } + cothread_lock(cotd, 1); + if (vke_txfifo_empty(sc)) + cothread_wait(cotd); + cothread_unlock(cotd, 1); } - - if (count) { - cothread_intr(cotd); - } - - cothread_wait(cotd); /* interlocks cothread lock */ } - /* NOT REACHED */ + cpu_sfence(); sc->cotd_tx_exit = VKE_COTD_DEAD; - cothread_unlock(cotd, 1); } static int diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c index a63bcc7127..c0a46dde97 100644 --- a/sys/kern/kern_intr.c +++ b/sys/kern/kern_intr.c @@ -720,8 +720,7 @@ ithread_fast_handler(struct intrframe *frame) /* * Interrupt threads run this as their main loop. * - * The handler begins execution outside a critical section and with the BGL - * held. + * The handler begins execution outside a critical section and no MP lock. * * The i_running state starts at 0. When an interrupt occurs, the hardware * interrupt is disabled and sched_ithd() The HW interrupt remains disabled diff --git a/sys/platform/vkernel/platform/cothread.c b/sys/platform/vkernel/platform/cothread.c index c432de5a11..b74b8eeecc 100644 --- a/sys/platform/vkernel/platform/cothread.c +++ b/sys/platform/vkernel/platform/cothread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * Copyright (c) 2008-2010 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -150,17 +150,17 @@ cothread_intr(cothread_t cotd) /* * Called by the vkernel to wakeup a cothread. + * The cothread must be locked. */ void cothread_signal(cothread_t cotd) { - crit_enter(); pthread_cond_signal(&cotd->cond); - crit_exit(); } /* * Called by the cothread to wait for the vkernel to call cothread_signal(). + * The cothread must be locked. */ void cothread_wait(cothread_t cotd) @@ -170,6 +170,13 @@ cothread_wait(cothread_t cotd) /* * Typically called by kernel thread or cothread + * + * These must be a matched pair. We will acquire a critical + * section in cothread_lock() and release it in cothread_unlock(). + * + * We do this to simplify cothread operation to prevent an + * interrupt (e.g. vkd_io_intr()) from preempting a vkd_strategy() + * call and creating a recursion in the pthread. */ void cothread_lock(cothread_t cotd, int is_cotd) @@ -177,9 +184,8 @@ cothread_lock(cothread_t cotd, int is_cotd) if (is_cotd) { pthread_mutex_lock(&cotd->mutex); } else { - crit_enter(); + crit_enter_id("cothread"); pthread_mutex_lock(&cotd->mutex); - crit_exit(); } } @@ -189,9 +195,8 @@ cothread_unlock(cothread_t cotd, int is_cotd) if (is_cotd) { pthread_mutex_unlock(&cotd->mutex); } else { - crit_enter(); pthread_mutex_unlock(&cotd->mutex); - crit_exit(); + crit_exit_id("cothread"); } } diff --git a/sys/platform/vkernel64/platform/cothread.c b/sys/platform/vkernel64/platform/cothread.c index dc81faaa78..58ea26d66d 100644 --- a/sys/platform/vkernel64/platform/cothread.c +++ b/sys/platform/vkernel64/platform/cothread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * Copyright (c) 2008-2010 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -150,17 +150,17 @@ cothread_intr(cothread_t cotd) /* * Called by the vkernel to wakeup a cothread. + * The cothread must be locked. */ void cothread_signal(cothread_t cotd) { - crit_enter(); pthread_cond_signal(&cotd->cond); - crit_exit(); } /* * Called by the cothread to wait for the vkernel to call cothread_signal(). + * The cothread must be locked. */ void cothread_wait(cothread_t cotd) @@ -170,6 +170,13 @@ cothread_wait(cothread_t cotd) /* * Typically called by kernel thread or cothread + * + * These must be a matched pair. We will acquire a critical + * section in cothread_lock() and release it in cothread_unlock(). + * + * We do this to simplify cothread operation to prevent an + * interrupt (e.g. vkd_io_intr()) from preempting a vkd_strategy() + * call and creating a recursion in the pthread. */ void cothread_lock(cothread_t cotd, int is_cotd) @@ -177,9 +184,8 @@ cothread_lock(cothread_t cotd, int is_cotd) if (is_cotd) { pthread_mutex_lock(&cotd->mutex); } else { - crit_enter(); + crit_enter_id("cothread"); pthread_mutex_lock(&cotd->mutex); - crit_exit(); } } @@ -189,8 +195,7 @@ cothread_unlock(cothread_t cotd, int is_cotd) if (is_cotd) { pthread_mutex_unlock(&cotd->mutex); } else { - crit_enter(); pthread_mutex_unlock(&cotd->mutex); - crit_exit(); + crit_exit_id("cothread"); } } -- 2.41.0