From: Matthew Dillon Date: Fri, 29 Jul 2011 08:25:46 +0000 (-0700) Subject: kernel - Fix rare IPIQ freezes X-Git-Tag: v2.12.0~269 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/da0b0e8b56caa72ea92647df9f1a7c5663384fe2?hp=aa56bc86de90a38efb257081b2227934e132b83b kernel - Fix rare IPIQ freezes * Ensure that an IPI interrupt is sent went waiting for an IPIQ to drain. The IPIQ can be pushed up by passive IPIs and not necessarily have a signal pending on the target cpu, so we have to put the check in our drain loop rather than outside the loop. * Add a cpu_pause() to reduce power use for the IPIQ drain case. * Normalize the use of gd_intr_nesting_level and clean up the code syntax for the ipiq_optimized case. * Remove the previous temporary IPI interrupt signaling fix, it was incomplete. * Fix a missing crit_exit() in the ENOENT case for lwkt_send_ipiq3_nowait(). * Track cpu's which are in the middle of ipiq processing and assert that a cpu is not in an IPIQ processing loop when switching between threads. * Normalize the use of ip->ip_npoll in the IPIQ code. This field is used to avoid unnecessary IPI interrupts. --- diff --git a/sys/kern/lwkt_ipiq.c b/sys/kern/lwkt_ipiq.c index 34e206b6e1..67433d9e8e 100644 --- a/sys/kern/lwkt_ipiq.c +++ b/sys/kern/lwkt_ipiq.c @@ -191,17 +191,17 @@ lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2) unsigned long rflags = read_rflags(); #endif - if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { - logipiq(cpu_send, func, arg1, arg2, gd, target); - cpu_send_ipiq(target->gd_cpuid); - } cpu_enable_intr(); ++ipiq_fifofull; - cpu_send_ipiq(target->gd_cpuid); DEBUG_PUSH_INFO("send_ipiq3"); while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { + if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { + logipiq(cpu_send, func, arg1, arg2, gd, target); + cpu_send_ipiq(target->gd_cpuid); + } KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); lwkt_process_ipiq(); + cpu_pause(); } DEBUG_POP_INFO(); #if defined(__i386__) @@ -220,25 +220,20 @@ lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2) ip->ip_arg2[windex] = arg2; cpu_sfence(); ++ip->ip_windex; - --gd->gd_intr_nesting_level; /* * signal the target cpu that there is work pending. */ - if (atomic_poll_acquire_int(&ip->ip_npoll)) { + if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { logipiq(cpu_send, func, arg1, arg2, gd, target); cpu_send_ipiq(target->gd_cpuid); } else { - if (ipiq_optimized == 0) { - logipiq(cpu_send, func, arg1, arg2, gd, target); - cpu_send_ipiq(target->gd_cpuid); - } else { - ++ipiq_avoided; - } + ++ipiq_avoided; } + --gd->gd_intr_nesting_level; crit_exit(); - logipiq(send_end, func, arg1, arg2, gd, target); + return(ip->ip_windex); } @@ -263,8 +258,8 @@ lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func, KKASSERT(target != gd); crit_enter(); - logipiq(send_pasv, func, arg1, arg2, gd, target); ++gd->gd_intr_nesting_level; + logipiq(send_pasv, func, arg1, arg2, gd, target); #ifdef INVARIANTS if (gd->gd_intr_nesting_level > 20) panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); @@ -285,17 +280,17 @@ lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func, unsigned long rflags = read_rflags(); #endif - if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { - logipiq(cpu_send, func, arg1, arg2, gd, target); - cpu_send_ipiq(target->gd_cpuid); - } cpu_enable_intr(); ++ipiq_fifofull; - cpu_send_ipiq(target->gd_cpuid); DEBUG_PUSH_INFO("send_ipiq3_passive"); while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { + if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { + logipiq(cpu_send, func, arg1, arg2, gd, target); + cpu_send_ipiq(target->gd_cpuid); + } KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); lwkt_process_ipiq(); + cpu_pause(); } DEBUG_POP_INFO(); #if defined(__i386__) @@ -321,8 +316,8 @@ lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func, * polls (typically on the next tick). */ crit_exit(); - logipiq(send_end, func, arg1, arg2, gd, target); + return(ip->ip_windex); } @@ -347,11 +342,15 @@ lwkt_send_ipiq3_nowait(globaldata_t target, ipifunc3_t func, logipiq(send_end, func, arg1, arg2, gd, target); return(0); } + crit_enter(); + ++gd->gd_intr_nesting_level; ++ipiq_count; ip = &gd->gd_ipiq[target->gd_cpuid]; if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) { logipiq(send_fail, func, arg1, arg2, gd, target); + --gd->gd_intr_nesting_level; + crit_exit(); return(ENOENT); } windex = ip->ip_windex & MAXCPUFIFO_MASK; @@ -364,17 +363,14 @@ lwkt_send_ipiq3_nowait(globaldata_t target, ipifunc3_t func, /* * This isn't a passive IPI, we still have to signal the target cpu. */ - if (atomic_poll_acquire_int(&ip->ip_npoll)) { + if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { logipiq(cpu_send, func, arg1, arg2, gd, target); cpu_send_ipiq(target->gd_cpuid); } else { - if (ipiq_optimized == 0) { - logipiq(cpu_send, func, arg1, arg2, gd, target); - cpu_send_ipiq(target->gd_cpuid); - } else { - ++ipiq_avoided; - } + ++ipiq_avoided; } + --gd->gd_intr_nesting_level; + crit_exit(); logipiq(send_end, func, arg1, arg2, gd, target); return(0); @@ -493,6 +489,7 @@ lwkt_process_ipiq(void) lwkt_ipiq_t ip; int n; + ++gd->gd_processing_ipiq; again: for (n = 0; n < ncpus; ++n) { if (n != gd->gd_cpuid) { @@ -504,12 +501,12 @@ again: } } } - if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { - if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, NULL)) { - if (gd->gd_curthread->td_cscount == 0) - goto again; - } + if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, NULL)) { + if (gd->gd_curthread->td_cscount == 0) + goto again; + /* need_ipiq(); do not reflag */ } + --gd->gd_processing_ipiq; } void @@ -535,6 +532,7 @@ again: if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, frame)) { if (gd->gd_curthread->td_cscount == 0) goto again; + /* need_ipiq(); do not reflag */ } } } @@ -653,13 +651,18 @@ lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, --mygd->gd_intr_nesting_level; /* - * Return non-zero if there are more IPI messages pending on this - * ipiq. ip_npoll is left set as long as possible to reduce the - * number of IPIs queued by the originating cpu, but must be cleared - * *BEFORE* checking windex. + * If the queue is empty release ip_npoll to enable the other cpu to + * send us an IPI interrupt again. + * + * Return non-zero if there is still more in the queue. Note that we + * must re-check the indexes after potentially releasing ip_npoll. The + * caller must loop or otherwise ensure that a loop will occur prior to + * blocking. */ - atomic_poll_release_int(&ip->ip_npoll); - return(wi != ip->ip_windex); + if (ip->ip_rindex == ip->ip_windex); + atomic_poll_release_int(&ip->ip_npoll); + cpu_lfence(); + return (ip->ip_rindex != ip->ip_windex); } static void diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index f77bfa5104..4fbf54d0c7 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -518,6 +518,8 @@ lwkt_switch(void) int oseq; int fatal_count; + KKASSERT(gd->gd_processing_ipiq == 0); + /* * Switching from within a 'fast' (non thread switched) interrupt or IPI * is illegal. However, we may have to do it anyway if we hit a fatal @@ -1083,6 +1085,7 @@ lwkt_preempt(thread_t ntd, int critcount) need_lwkt_resched(); return; } + KKASSERT(gd->gd_processing_ipiq == 0); /* * Since we are able to preempt the current thread, there is no need to diff --git a/sys/platform/vkernel64/x86_64/cpu_regs.c b/sys/platform/vkernel64/x86_64/cpu_regs.c index c396255267..6007b68c6b 100644 --- a/sys/platform/vkernel64/x86_64/cpu_regs.c +++ b/sys/platform/vkernel64/x86_64/cpu_regs.c @@ -712,6 +712,7 @@ cpu_idle(void) crit_exit(); KKASSERT(td->td_critcount == 0); cpu_enable_intr(); + for (;;) { /* * See if there are any LWKTs ready to go. @@ -720,7 +721,9 @@ cpu_idle(void) /* * The idle loop halts only if no threads are scheduleable - * and no signals have occured. + * and no signals have occured. If we race a signal + * RQF_WAKEUP and other gd_reqflags will cause umtx_sleep() + * to return immediately. */ if (cpu_idle_hlt && (td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) { @@ -732,6 +735,7 @@ cpu_idle(void) #endif reqflags = gd->mi.gd_reqflags & ~RQF_IDLECHECK_WK_MASK; + KKASSERT(gd->mi.gd_processing_ipiq == 0); umtx_sleep(&gd->mi.gd_reqflags, reqflags, 1000000); #ifdef DEBUGIDLE diff --git a/sys/platform/vkernel64/x86_64/mp.c b/sys/platform/vkernel64/x86_64/mp.c index 4073cc31e8..61d2ba32bb 100644 --- a/sys/platform/vkernel64/x86_64/mp.c +++ b/sys/platform/vkernel64/x86_64/mp.c @@ -192,9 +192,10 @@ mp_announce(void) void cpu_send_ipiq(int dcpu) { - if (CPUMASK(dcpu) & smp_active_mask) + if (CPUMASK(dcpu) & smp_active_mask) { if (pthread_kill(ap_tids[dcpu], SIGUSR1) != 0) panic("pthread_kill failed in cpu_send_ipiq"); + } #if 0 panic("XXX cpu_send_ipiq()"); #endif diff --git a/sys/sys/globaldata.h b/sys/sys/globaldata.h index 8845f29414..b506f3b1a1 100644 --- a/sys/sys/globaldata.h +++ b/sys/sys/globaldata.h @@ -159,7 +159,7 @@ struct globaldata { sysid_t gd_sysid_alloc; /* allocate unique sysid */ struct tslpque *gd_tsleep_hash; /* tsleep/wakeup support */ - void *gd_unused08; + long gd_processing_ipiq; int gd_spinlocks_wr; /* Exclusive spinlocks held */ struct systimer *gd_systimer_inprog; /* in-progress systimer */ int gd_timer_running;