kernel - Refactor smp collision statistics
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 5 Oct 2017 04:46:57 +0000 (21:46 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Mon, 16 Oct 2017 18:30:22 +0000 (11:30 -0700)
* Add an indefinite wait timing API (sys/indefinite.h,
  sys/indefinite2.h).  This interface uses the TSC and will
  record lock latencies to our pcpu stats in microseconds.
  The systat -pv 1 display shows this under smpcoll.

  Note that latencies generated by tokens, lockmgr, and mutex
  locks do not necessarily reflect actual lost cpu time as the
  kernel will schedule other threads while those are blocked,
  if other threads are available.

* Formalize TSC operations more, supply a type (tsc_uclock_t and
  tsc_sclock_t).

* Reinstrument lockmgr, mutex, token, and spinlocks to use the new
  indefinite timing interface.

26 files changed:
sys/cpu/x86_64/include/cpufunc.h
sys/kern/kern_clock.c
sys/kern/kern_lock.c
sys/kern/kern_mutex.c
sys/kern/kern_spinlock.c
sys/kern/lwkt_thread.c
sys/kern/lwkt_token.c
sys/net/altq/altq_subr.c
sys/platform/pc64/apic/lapic.c
sys/platform/pc64/include/clock.h
sys/platform/pc64/isa/clock.c
sys/platform/pc64/x86_64/mp_machdep.c
sys/platform/pc64/x86_64/pmap_inval.c
sys/platform/pc64/x86_64/trap.c
sys/platform/vkernel64/include/clock.h
sys/platform/vkernel64/platform/init.c
sys/sys/buf2.h
sys/sys/indefinite.h [copied from sys/sys/microtime_pcpu.h with 65% similarity]
sys/sys/indefinite2.h [new file with mode: 0644]
sys/sys/lock.h
sys/sys/microtime_pcpu.h
sys/sys/mutex.h
sys/sys/mutex2.h
sys/sys/thread.h
sys/sys/time.h
sys/vfs/nfs/nfs_vfsops.c

index 1d95c11..a379bbc 100644 (file)
@@ -42,6 +42,7 @@
 
 #include <sys/cdefs.h>
 #include <sys/thread.h>
+#include <machine/clock.h>
 #include <machine/psl.h>
 #include <machine/smp.h>
 
@@ -549,20 +550,20 @@ rdpmc(u_int pmc)
 
 #define _RDTSC_SUPPORTED_
 
-static __inline u_int64_t
+static __inline tsc_uclock_t
 rdtsc(void)
 {
        u_int32_t low, high;
 
        __asm __volatile("rdtsc" : "=a" (low), "=d" (high));
-       return (low | ((u_int64_t)high << 32));
+       return (low | ((tsc_uclock_t)high << 32));
 }
 
 #ifdef _KERNEL
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 
-static __inline u_int64_t
+static __inline tsc_uclock_t
 rdtsc_ordered(void)
 {
        if (cpu_vendor_id == CPU_VENDOR_INTEL)
@@ -953,7 +954,7 @@ u_short     rfs(void);
 u_short        rgs(void);
 u_int64_t rdmsr(u_int msr);
 u_int64_t rdpmc(u_int pmc);
-u_int64_t rdtsc(void);
+tsc_uclock_t rdtsc(void);
 u_int  read_rflags(void);
 void   wbinvd(void);
 void   write_rflags(u_int rf);
index 22dc72a..5b5cc0c 100644 (file)
@@ -316,7 +316,7 @@ initclocks(void *dummy)
        initclocks_pcpu();
        clocks_running = 1;
        if (kpmap) {
-           kpmap->tsc_freq = (uint64_t)tsc_frequency;
+           kpmap->tsc_freq = tsc_frequency;
            kpmap->tick_freq = hz;
        }
 }
@@ -1672,7 +1672,7 @@ pps_event(struct pps_state *pps, sysclock_t count, int event)
  *
  * Returns -1 if the TSC is not supported.
  */
-int64_t
+tsc_uclock_t
 tsc_get_target(int ns)
 {
 #if defined(_RDTSC_SUPPORTED_)
index d512a86..f95acb3 100644 (file)
@@ -46,6 +46,7 @@
 #include <sys/spinlock.h>
 #include <sys/thread2.h>
 #include <sys/spinlock2.h>
+#include <sys/indefinite2.h>
 
 static void undo_upreq(struct lock *lkp);
 
@@ -63,6 +64,10 @@ SYSCTL_PROC(_kern, OID_AUTO, cancel_test, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 
 #endif
 
+int lock_test_mode;
+SYSCTL_INT(_debug, OID_AUTO, lock_test_mode, CTLFLAG_RW,
+          &lock_test_mode, 0, "");
+
 /*
  * Locking primitives implementation.
  * Locks provide shared/exclusive sychronization.
@@ -93,11 +98,13 @@ debuglockmgr(struct lock *lkp, u_int flags,
        int pflags;
        int wflags;
        int timo;
+       int info_init;
 #ifdef DEBUG_LOCKS
        int i;
 #endif
 
        error = 0;
+       info_init = 0;
 
        if (mycpu->gd_intr_nesting_level &&
            (flags & LK_NOWAIT) == 0 &&
@@ -198,11 +205,12 @@ again:
                                goto again;
                        }
 
-                       mycpu->gd_cnt.v_lock_name[0] = 'S';
-                       strncpy(mycpu->gd_cnt.v_lock_name + 1,
-                               lkp->lk_wmesg,
-                               sizeof(mycpu->gd_cnt.v_lock_name) - 2);
-                       ++mycpu->gd_cnt.v_lock_colls;
+                       if (info_init == 0 &&
+                           (lkp->lk_flags & LK_NOCOLLSTATS) == 0) {
+                               indefinite_init(&td->td_indefinite,
+                                               lkp->lk_wmesg, 1, 'l');
+                               info_init = 1;
+                       }
 
                        error = tsleep(lkp, pflags | PINTERLOCKED,
                                       lkp->lk_wmesg, timo);
@@ -284,11 +292,12 @@ again:
                        goto again;
                }
 
-               mycpu->gd_cnt.v_lock_name[0] = 'X';
-               strncpy(mycpu->gd_cnt.v_lock_name + 1,
-                       lkp->lk_wmesg,
-                       sizeof(mycpu->gd_cnt.v_lock_name) - 2);
-               ++mycpu->gd_cnt.v_lock_colls;
+               if (info_init == 0 &&
+                   (lkp->lk_flags & LK_NOCOLLSTATS) == 0) {
+                       indefinite_init(&td->td_indefinite, lkp->lk_wmesg,
+                                       1, 'L');
+                       info_init = 1;
+               }
 
                error = tsleep(lkp, pflags | PINTERLOCKED,
                               lkp->lk_wmesg, timo);
@@ -298,6 +307,7 @@ again:
                        error = ENOLCK;
                        break;
                }
+               indefinite_check(&td->td_indefinite);
                goto again;
 
        case LK_DOWNGRADE:
@@ -436,6 +446,13 @@ again:
                        wflags |= (count - 1);
                }
 
+               if (info_init == 0 &&
+                   (lkp->lk_flags & LK_NOCOLLSTATS) == 0) {
+                       indefinite_init(&td->td_indefinite, lkp->lk_wmesg,
+                                       1, 'U');
+                       info_init = 1;
+               }
+
                if (atomic_cmpset_int(&lkp->lk_count, count, wflags)) {
                        COUNT(td, -1);
 
@@ -445,12 +462,6 @@ again:
                        if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1))
                                wakeup(lkp);
 
-                       mycpu->gd_cnt.v_lock_name[0] = 'U';
-                       strncpy(mycpu->gd_cnt.v_lock_name + 1,
-                               lkp->lk_wmesg,
-                               sizeof(mycpu->gd_cnt.v_lock_name) - 2);
-                       ++mycpu->gd_cnt.v_lock_colls;
-
                        error = tsleep(lkp, pflags | PINTERLOCKED,
                                       lkp->lk_wmesg, timo);
                        if (error) {
@@ -475,6 +486,7 @@ again:
                        else
                                flags = LK_WAITUPGRADE; /* we own the bit */
                }
+               indefinite_check(&td->td_indefinite);
                goto again;
 
        case LK_WAITUPGRADE:
@@ -503,12 +515,6 @@ again:
                        timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
                        tsleep_interlock(lkp, pflags);
                        if (atomic_fetchadd_int(&lkp->lk_count, 0) == count) {
-                               mycpu->gd_cnt.v_lock_name[0] = 'U';
-                               strncpy(mycpu->gd_cnt.v_lock_name + 1,
-                                       lkp->lk_wmesg,
-                                       sizeof(mycpu->gd_cnt.v_lock_name) - 2);
-                               ++mycpu->gd_cnt.v_lock_colls;
-
                                error = tsleep(lkp, pflags | PINTERLOCKED,
                                               lkp->lk_wmesg, timo);
                                if (error) {
@@ -523,6 +529,7 @@ again:
                        }
                        /* retry */
                }
+               indefinite_check(&td->td_indefinite);
                goto again;
 
        case LK_RELEASE:
@@ -673,6 +680,10 @@ again:
                    flags & LK_TYPE_MASK);
                /* NOTREACHED */
        }
+
+       if (info_init)
+               indefinite_done(&td->td_indefinite);
+
        return (error);
 }
 
@@ -903,12 +914,9 @@ sysctl_cancel_lock(SYSCTL_HANDLER_ARGS)
        if (req->newptr) {
                SYSCTL_XUNLOCK();
                lockmgr(&cancel_lk, LK_EXCLUSIVE);
-               kprintf("x");
                error = tsleep(&error, PCATCH, "canmas", hz * 5);
                lockmgr(&cancel_lk, LK_CANCEL_BEG);
-               kprintf("y");
                error = tsleep(&error, PCATCH, "canmas", hz * 5);
-               kprintf("z");
                lockmgr(&cancel_lk, LK_RELEASE);
                SYSCTL_XLOCK();
                SYSCTL_OUT(req, &error, sizeof(error));
index b7b247f..60cd51b 100644 (file)
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
+#include <sys/indefinite.h>
 #include <sys/thread.h>
 
 #include <machine/cpufunc.h>
 
 #include <sys/thread2.h>
 #include <sys/mutex2.h>
+#include <sys/indefinite2.h>
 
 static int mtx_chain_link_ex(mtx_t *mtx, u_int olock);
 static int mtx_chain_link_sh(mtx_t *mtx, u_int olock);
@@ -936,8 +938,14 @@ mtx_delete_link(mtx_t *mtx, mtx_link_t *link)
 int
 mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
 {
+       thread_t td = curthread;
        int error;
 
+       if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0) {
+               indefinite_init(&td->td_indefinite, mtx->mtx_ident, 1,
+                       ((link->state & MTX_LINK_LINKED_SH) ? 'm' : 'M'));
+       }
+
        /*
         * Sleep.  Handle false wakeups, interruptions, etc.
         * The link may also have been aborted.  The LINKED
@@ -949,20 +957,13 @@ mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
                tsleep_interlock(link, 0);
                cpu_lfence();
                if (link->state & MTX_LINK_LINKED) {
-                       if (link->state & MTX_LINK_LINKED_SH)
-                               mycpu->gd_cnt.v_lock_name[0] = 'S';
-                       else
-                               mycpu->gd_cnt.v_lock_name[0] = 'X';
-                       strncpy(mycpu->gd_cnt.v_lock_name + 1,
-                               mtx->mtx_ident,
-                               sizeof(mycpu->gd_cnt.v_lock_name) - 2);
-                       ++mycpu->gd_cnt.v_lock_colls;
-
                        error = tsleep(link, flags | PINTERLOCKED,
                                       mtx->mtx_ident, to);
                        if (error)
                                break;
                }
+               if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0)
+                       indefinite_check(&td->td_indefinite);
        }
 
        /*
@@ -1014,6 +1015,9 @@ mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
         */
        link->state = MTX_LINK_IDLE;
 
+       if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0)
+               indefinite_done(&td->td_indefinite);
+
        return error;
 }
 
index c009ac0..031cbb9 100644 (file)
@@ -62,6 +62,7 @@
 #include <machine/cpufunc.h>
 #include <machine/specialreg.h>
 #include <machine/clock.h>
+#include <sys/indefinite2.h>
 #include <sys/spinlock.h>
 #include <sys/spinlock2.h>
 #include <sys/ktr.h>
 
 struct spinlock pmap_spin = SPINLOCK_INITIALIZER(pmap_spin, "pmap_spin");
 
-struct indefinite_info {
-       sysclock_t      base;
-       int             secs;
-       const char      *ident;
-};
-
 /*
  * Kernal Trace
  */
@@ -109,9 +104,6 @@ SYSCTL_LONG(_debug, OID_AUTO, spinlocks_add_latency, CTLFLAG_RW,
 
 #endif
 
-static int spin_indefinite_check(struct spinlock *spin,
-                                 struct indefinite_info *info);
-
 /*
  * We contested due to another exclusive lock holder.  We lose.
  *
@@ -178,8 +170,7 @@ spin_trylock_contested(struct spinlock *spin)
 void
 _spin_lock_contested(struct spinlock *spin, const char *ident, int value)
 {
-       struct indefinite_info info = { 0, 0, ident };
-       int i;
+       thread_t td = curthread;
 
        /*
         * WARNING! Caller has already incremented the lock.  We must
@@ -194,6 +185,7 @@ _spin_lock_contested(struct spinlock *spin, const char *ident, int value)
                if (atomic_cmpset_int(&spin->counta, SPINLOCK_SHARED | 1, 1))
                        return;
        }
+       indefinite_init(&td->td_indefinite, ident, 0, 'S');
 
        /*
         * Transfer our exclusive request to the high bits and clear the
@@ -211,16 +203,9 @@ _spin_lock_contested(struct spinlock *spin, const char *ident, int value)
        if (value & SPINLOCK_SHARED)
                atomic_clear_int(&spin->counta, SPINLOCK_SHARED);
 
-#ifdef DEBUG_LOCKS_LATENCY
-       long j;
-       for (j = spinlocks_add_latency; j > 0; --j)
-               cpu_ccfence();
-#endif
        /*
         * Spin until we can acquire a low-count of 1.
         */
-       i = 0;
-       /*logspin(beg, spin, 'w');*/
        for (;;) {
                /*
                 * If the low bits are zero, try to acquire the exclusive lock
@@ -243,20 +228,10 @@ _spin_lock_contested(struct spinlock *spin, const char *ident, int value)
                                      (ovalue - SPINLOCK_EXCLWAIT) | 1)) {
                        break;
                }
-               if ((++i & 0x7F) == 0x7F) {
-                       mycpu->gd_cnt.v_lock_name[0] = 'X';
-                       strncpy(mycpu->gd_cnt.v_lock_name + 1,
-                               ident,
-                               sizeof(mycpu->gd_cnt.v_lock_name) - 2);
-                       ++mycpu->gd_cnt.v_lock_colls;
-                       if (spin_indefinite_check(spin, &info))
-                               break;
-               }
-#ifdef _KERNEL_VIRTUAL
-               pthread_yield();
-#endif
+               if (indefinite_check(&td->td_indefinite))
+                       break;
        }
-       /*logspin(end, spin, 'w');*/
+       indefinite_done(&td->td_indefinite);
 }
 
 /*
@@ -269,8 +244,9 @@ _spin_lock_contested(struct spinlock *spin, const char *ident, int value)
 void
 _spin_lock_shared_contested(struct spinlock *spin, const char *ident)
 {
-       struct indefinite_info info = { 0, 0, ident };
-       int i;
+       thread_t td = curthread;
+
+       indefinite_init(&td->td_indefinite, ident, 0, 's');
 
        /*
         * Undo the inline's increment.
@@ -283,8 +259,6 @@ _spin_lock_shared_contested(struct spinlock *spin, const char *ident)
                cpu_ccfence();
 #endif
 
-       /*logspin(beg, spin, 'w');*/
-       i = 0;
        for (;;) {
                /*
                 * Loop until we can acquire the shared spinlock.  Note that
@@ -315,56 +289,10 @@ _spin_lock_shared_contested(struct spinlock *spin, const char *ident)
                                              ovalue + 1))
                                break;
                }
-               if ((++i & 0x7F) == 0x7F) {
-                       mycpu->gd_cnt.v_lock_name[0] = 'S';
-                       strncpy(mycpu->gd_cnt.v_lock_name + 1,
-                               ident,
-                               sizeof(mycpu->gd_cnt.v_lock_name) - 2);
-                       ++mycpu->gd_cnt.v_lock_colls;
-                       if (spin_indefinite_check(spin, &info))
-                               break;
-               }
-#ifdef _KERNEL_VIRTUAL
-               pthread_yield();
-#endif
-       }
-       /*logspin(end, spin, 'w');*/
-}
-
-static
-int
-spin_indefinite_check(struct spinlock *spin, struct indefinite_info *info)
-{
-       sysclock_t count;
-
-       cpu_spinlock_contested();
-
-       count = sys_cputimer->count();
-       if (info->secs == 0) {
-               info->base = count;
-               ++info->secs;
-       } else if (count - info->base > sys_cputimer->freq) {
-               kprintf("spin_lock: %s(%p), indefinite wait (%d secs)!\n",
-                       info->ident, spin, info->secs);
-               info->base = count;
-               ++info->secs;
-               if (panicstr)
-                       return (TRUE);
-#if defined(INVARIANTS)
-               if (spin_lock_test_mode) {
-                       print_backtrace(-1);
-                       return (TRUE);
-               }
-#endif
-#if defined(INVARIANTS)
-               if (info->secs == 11)
-                       print_backtrace(-1);
-#endif
-               if (info->secs == 60)
-                       panic("spin_lock: %s(%p), indefinite wait!",
-                             info->ident, spin);
+               if (indefinite_check(&td->td_indefinite))
+                       break;
        }
-       return (FALSE);
+       indefinite_done(&td->td_indefinite);
 }
 
 /*
index d3a26f5..e6e4d62 100644 (file)
 #include <sys/lock.h>
 #include <sys/spinlock.h>
 #include <sys/ktr.h>
+#include <sys/indefinite.h>
 
 #include <sys/thread2.h>
 #include <sys/spinlock2.h>
+#include <sys/indefinite2.h>
 
 #include <sys/dsched.h>
 
@@ -701,7 +703,6 @@ lwkt_switch(void)
            {
                goto havethread;
            }
-           ++gd->gd_cnt.v_lock_colls;
            ++ntd->td_contended;        /* overflow ok */
 #ifdef LOOPMASK
            if (tsc_frequency && rdtsc() - tsc_base > tsc_frequency) {
@@ -735,7 +736,6 @@ lwkt_switch(void)
                    goto havethread;
            }
            ++ntd->td_contended;        /* overflow ok */
-           ++gd->gd_cnt.v_lock_colls;
        }
 
        /*
@@ -766,6 +766,12 @@ havethread:
     ++gd->gd_cnt.v_swtch;
     gd->gd_idle_repeat = 0;
 
+    /*
+     * If we were busy waiting record final disposition
+     */
+    if (ntd->td_indefinite.type)
+           indefinite_done(&ntd->td_indefinite);
+
 havethread_preempted:
     /*
      * If the new target does not need the MP lock and we are holding it,
index 8e35887..7083045 100644 (file)
 #include <machine/cpu.h>
 #include <sys/lock.h>
 #include <sys/spinlock.h>
+#include <sys/indefinite.h>
 
 #include <sys/thread2.h>
 #include <sys/spinlock2.h>
 #include <sys/mplock2.h>
+#include <sys/indefinite2.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
@@ -480,10 +482,13 @@ lwkt_getalltokens(thread_t td, int spinning)
                         */
                        KASSERT(tok->t_desc,
                                ("token %p is not initialized", tok));
-                       strncpy(td->td_gd->gd_cnt.v_lock_name,
-                               tok->t_desc,
-                               sizeof(td->td_gd->gd_cnt.v_lock_name) - 1);
 
+                       if (td->td_indefinite.type == 0) {
+                               indefinite_init(&td->td_indefinite,
+                                               tok->t_desc, 1, 't');
+                       } else {
+                               indefinite_check(&td->td_indefinite);
+                       }
                        if (lwkt_sched_debug > 0) {
                                --lwkt_sched_debug;
                                kprintf("toka %p %s %s\n",
@@ -592,6 +597,12 @@ _lwkt_getalltokens_sorted(thread_t td)
                         * Otherwise we failed to acquire all the tokens.
                         * Release whatever we did get.
                         */
+                       if (td->td_indefinite.type == 0) {
+                               indefinite_init(&td->td_indefinite,
+                                               tok->t_desc, 1, 't');
+                       } else {
+                               indefinite_check(&td->td_indefinite);
+                       }
                        if (lwkt_sched_debug > 0) {
                                --lwkt_sched_debug;
                                kprintf("tokb %p %s %s\n",
index 00381c0..0210a91 100644 (file)
@@ -867,7 +867,7 @@ init_machclk(void)
         */
 #ifdef _RDTSC_SUPPORTED_
        if (tsc_present)
-               machclk_freq = (uint64_t)tsc_frequency;
+               machclk_freq = tsc_frequency;
 #endif
 
        /*
index 7bc552e..5de6239 100644 (file)
@@ -665,7 +665,8 @@ apic_ipi(int dest_type, int vector, int delivery_mode)
                tsc = rdtsc();
                while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
                        cpu_pause();
-                       if ((int64_t)(rdtsc() - (tsc + tsc_frequency)) > 0) {
+                       if ((tsc_sclock_t)(rdtsc() -
+                                          (tsc + tsc_frequency)) > 0) {
                                kprintf("apic_ipi stall cpu %d (sing)\n",
                                        mycpuid);
                                tsc = rdtsc();
@@ -698,7 +699,8 @@ single_apic_ipi(int cpu, int vector, int delivery_mode)
                tsc = rdtsc();
                while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
                        cpu_pause();
-                       if ((int64_t)(rdtsc() - (tsc + tsc_frequency)) > 0) {
+                       if ((tsc_sclock_t)(rdtsc() -
+                                          (tsc + tsc_frequency)) > 0) {
                                kprintf("single_apic_ipi stall cpu %d (sing)\n",
                                        mycpuid);
                                tsc = rdtsc();
index fda63d7..4011bc7 100644 (file)
@@ -25,6 +25,9 @@ typedef struct TOTALDELAY {
        sysclock_t      last_clock;
 } TOTALDELAY;
 
+typedef uint64_t tsc_uclock_t;
+typedef int64_t        tsc_sclock_t;
+
 /*
  * i386 to clock driver interface.
  * XXX large parts of the driver and its interface are misplaced.
@@ -36,9 +39,10 @@ extern int   timer0_max_count;
 extern int     tsc_present;
 extern int     tsc_invariant;
 extern int     tsc_mpsync;
-extern int64_t tsc_frequency;
 extern int     tsc_is_broken;
 extern int     wall_cmos_clock;
+extern tsc_uclock_t tsc_frequency;
+extern tsc_uclock_t tsc_oneus_approx;  /* do not use for fine calc, min 1 */
 
 /*
  * Driver to clock driver interface.
index ee45f6f..496f09c 100644 (file)
@@ -106,10 +106,12 @@ int       disable_rtc_set;        /* disable resettodr() if != 0 */
 int    tsc_present;
 int    tsc_invariant;
 int    tsc_mpsync;
-int64_t        tsc_frequency;
 int    tsc_is_broken;
 int    wall_cmos_clock;        /* wall CMOS clock assumed if != 0 */
 int    timer0_running;
+tsc_uclock_t tsc_frequency;
+tsc_uclock_t tsc_oneus_approx; /* always at least 1, approx only */
+
 enum tstate { RELEASED, ACQUIRED };
 enum tstate timer0_state;
 enum tstate timer1_state;
@@ -557,7 +559,7 @@ readrtc(int port)
 static u_int
 calibrate_clocks(void)
 {
-       u_int64_t old_tsc;
+       tsc_uclock_t old_tsc;
        u_int tot_count;
        sysclock_t count, prev_count;
        int sec, start_sec, timeout;
@@ -633,6 +635,7 @@ calibrate_clocks(void)
                            (intmax_t)tsc_frequency);
                }
        }
+       tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
 
        kprintf("i8254 clock: %u Hz\n", tot_count);
        return (tot_count);
@@ -849,15 +852,15 @@ startrtclock(void)
                cputimer_set_frequency(&i8254_cputimer, freq);
        } else {
                if (bootverbose)
-                       kprintf(
-                   "%d Hz differs from default of %d Hz by more than 1%%\n",
-                              freq, i8254_cputimer.freq);
+                       kprintf("%d Hz differs from default of %d Hz "
+                               "by more than 1%%\n",
+                               freq, i8254_cputimer.freq);
                tsc_frequency = 0;
        }
 
        if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
-               kprintf(
-"hw.calibrate_timers_with_rtc not set - using old calibration method\n");
+               kprintf("hw.calibrate_timers_with_rtc not "
+                       "set - using old calibration method\n");
                tsc_frequency = 0;
        }
 
@@ -883,8 +886,10 @@ skip_rtc_based:
                    tsc_invariant ? " invariant" : "",
                    (intmax_t)tsc_frequency);
        }
+       tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
 
-       EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown, NULL, SHUTDOWN_PRI_LAST);
+       EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown,
+                             NULL, SHUTDOWN_PRI_LAST);
 }
 
 /*
@@ -1247,7 +1252,7 @@ static void
 tsc_mpsync_test_loop(struct tsc_mpsync_arg *arg)
 {
        struct globaldata *gd = mycpu;
-       uint64_t test_end, test_begin;
+       tsc_uclock_t test_end, test_begin;
        u_int i;
 
        if (bootverbose) {
index 310d6fc..80d5b2a 100644 (file)
@@ -894,7 +894,7 @@ smp_invltlb(void)
        cpumask_t mask;
        unsigned long rflags;
 #ifdef LOOPRECOVER
-       uint64_t tsc_base = rdtsc();
+       tsc_uclock_t tsc_base = rdtsc();
        int repeats = 0;
 #endif
 
@@ -1115,7 +1115,7 @@ smp_inval_intr(void)
        struct mdglobaldata *md = mdcpu;
        cpumask_t cpumask;
 #ifdef LOOPRECOVER
-       uint64_t tsc_base = rdtsc();
+       tsc_uclock_t tsc_base = rdtsc();
 #endif
 
 #if 0
index a572da8..65e4ba7 100644 (file)
@@ -93,7 +93,7 @@ struct pmap_inval_info {
 #ifdef LOOPRECOVER
        cpumask_t       sigmask;
        int             failed;
-       int64_t         tsc_target;
+       tsc_uclock_t    tsc_target;
 #endif
 } __cachealign;
 
@@ -163,10 +163,10 @@ __inline
 int
 loopwdog(struct pmap_inval_info *info)
 {
-       int64_t tsc;
+       tsc_uclock_t tsc;
 
        tsc = rdtsc();
-       if (info->tsc_target - tsc < 0 && tsc_frequency) {
+       if ((tsc_sclock_t)(info->tsc_target - tsc) < 0 && tsc_frequency) {
                info->tsc_target = tsc + (tsc_frequency * LOOPRECOVER_TIMEOUT2);
                return 1;
        }
index a6f0678..e701ff3 100644 (file)
@@ -1223,7 +1223,7 @@ syscall2(struct trapframe *frame)
         *       is responsible for getting the MP lock.
         */
 #ifdef SYSCALL_DEBUG
-       uint64_t tscval = rdtsc();
+       tsc_uclock_t tscval = rdtsc();
 #endif
        error = (*callp->sy_call)(&args);
 #ifdef SYSCALL_DEBUG
index c145eef..ed49bfb 100644 (file)
@@ -15,6 +15,9 @@
 #include <sys/types.h>
 #endif
 
+typedef uint64_t tsc_uclock_t;
+typedef int64_t tsc_sclock_t;
+
 /*
  * i386 to clock driver interface.
  * XXX large parts of the driver and its interface are misplaced.
@@ -26,9 +29,10 @@ extern int   timer0_max_count;
 extern int     tsc_present;
 extern int     tsc_invariant;
 extern int     tsc_mpsync;
-extern int64_t tsc_frequency;
 extern int     tsc_is_broken;
 extern int     wall_cmos_clock;
+extern tsc_uclock_t tsc_frequency;
+extern tsc_uclock_t tsc_oneus_approx;  /* do not use for fine calc, min 1 */
 
 /*
  * Driver to clock driver interface.
index 9cbfc36..f6f7fbb 100644 (file)
@@ -115,7 +115,6 @@ u_int cpu_feature;  /* XXX */
 int tsc_present;
 int tsc_invariant;
 int tsc_mpsync;
-int64_t tsc_frequency;
 int optcpus;           /* number of cpus - see mp_start() */
 int cpu_bits;
 int lwp_cpu_lock;      /* if/how to lock virtual CPUs to real CPUs */
@@ -127,6 +126,9 @@ int vmm_enabled;    /* VMM HW assisted enable */
 int use_precise_timer = 0;     /* use a precise timer (more expensive) */
 struct privatespace *CPU_prvspace;
 
+tsc_uclock_t tsc_frequency;
+tsc_uclock_t tsc_oneus_approx;
+
 extern uint64_t KPML4phys;     /* phys addr of kernel level 4 */
 
 static struct trapframe proc0_tf;
@@ -481,6 +483,7 @@ main(int ac, char **av)
        sysctlbyname("hw.tsc_frequency", &tsc_frequency, &vsize, NULL, 0);
        if (tsc_present)
                cpu_feature |= CPUID_TSC;
+       tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
 
        /*
         * Check SSE
index ed29fcd..b40d882 100644 (file)
@@ -67,7 +67,7 @@
  * Initialize a lock.
  */
 #define BUF_LOCKINIT(bp) \
-       lockinit(&(bp)->b_lock, buf_wmesg, 0, 0)
+       lockinit(&(bp)->b_lock, buf_wmesg, 0, LK_NOCOLLSTATS)
 
 /*
  *
similarity index 65%
copy from sys/sys/microtime_pcpu.h
copy to sys/sys/indefinite.h
index 32e1a79..c6d8673 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 2014 The DragonFly Project.  All rights reserved.
+ * Copyright (c) 2017 The DragonFly Project.  All rights reserved.
  *
  * This code is derived from software contributed to The DragonFly Project
- * by Sepherosa Ziehau <sepherosa@gmail.com>
+ * by Matthew Dillon <dillon@backplane.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-
-#ifndef _SYS_MICROTIME_PCPU_H_
-#define _SYS_MICROTIME_PCPU_H_
-
-#ifndef _SYS_PARAM_H_
-#include <sys/param.h>
-#endif
-
-#ifndef _SYS_TIME_H_
-#include <sys/time.h>
-#endif
-
-#include <machine/clock.h>
-#include <machine/cpufunc.h>
+#ifndef _SYS_INDEFINITE_H_
+#define _SYS_INDEFINITE_H_
 
 /*
- * This 'time' only guarantees monotonicly increment on the same CPU
+ * Indefinite info collection and handling code for contention loops
  */
+#ifndef _MACHINE_CLOCK_H_
+#include <machine/clock.h>
+#endif
 
-union microtime_pcpu {
-       struct timeval  tv;
-       uint64_t        tsc;
-};
+extern int lock_test_mode;
 
-static __inline void
-microtime_pcpu_get(union microtime_pcpu *t)
-{
-       if (tsc_invariant)
-               t->tsc = rdtsc();
-       else
-               microuptime(&t->tv);
-}
+struct indefinite_info {
+       tsc_uclock_t    base;
+       const char      *ident;
+       int             secs;
+       int             count;
+       char            type;
+};
 
-static __inline int
-microtime_pcpu_diff(const union microtime_pcpu *s,
-    const union microtime_pcpu *e)
-{
-       if (tsc_invariant) {
-               return (((e->tsc - s->tsc) * 1000000) / tsc_frequency);
-       } else {
-               return ((e->tv.tv_usec - s->tv.tv_usec) +
-                       (e->tv.tv_sec - s->tv.tv_sec) * 1000000);
-       }
-}
+typedef struct indefinite_info indefinite_info_t;
 
-#endif /* !_SYS_MICROTIME_PCPU_H_ */
+#endif
diff --git a/sys/sys/indefinite2.h b/sys/sys/indefinite2.h
new file mode 100644 (file)
index 0000000..9d2f77a
--- /dev/null
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2017 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _SYS_INDEFINITE2_H_
+#define _SYS_INDEFINITE2_H_
+
+/*
+ * Indefinite info collection and handling code for contention loops
+ */
+#ifndef _SYS_INDEFINITE_H_
+#include <sys/indefinite.h>
+#endif
+#ifndef _SYS_GLOBALDATA_H_
+#include <sys/glolbaldata.h>
+#endif
+
+/*
+ * Initialize the indefinite state (only if the TSC is supported)
+ */
+static __inline void
+indefinite_init(indefinite_info_t *info, const char *ident, int now, char type)
+{
+       if (tsc_frequency) {
+               info->base = rdtsc();
+               info->ident = ident;
+               info->secs = 0;
+               info->count = 0;
+               info->type = type;
+
+               if (now) {
+                       mycpu->gd_cnt.v_lock_name[0] = info->type;
+                       strncpy(mycpu->gd_cnt.v_lock_name + 1, info->ident,
+                               sizeof(mycpu->gd_cnt.v_lock_name) - 2);
+               }
+       }
+}
+
+/*
+ * Update the state during any loop, record collision time in microseconds.
+ */
+static __inline int
+indefinite_check(indefinite_info_t *info)
+{
+       tsc_uclock_t delta;
+       const char *str;
+
+#ifdef _KERNEL_VIRTUAL
+       pthread_yield();
+#else
+       cpu_pause();
+#endif
+       if (info->type == 0)
+               return FALSE;
+       if (++info->count != 128)
+               return FALSE;
+       info->count = 0;
+       delta = rdtsc() - info->base;
+
+       /*
+        * Ignore minor one-second interval error accumulation in
+        * favor of ensuring that info->base is fully synchronized.
+        */
+       if (info->secs == 0 && delta > tsc_oneus_approx) {
+               mycpu->gd_cnt.v_lock_name[0] = info->type;
+               strncpy(mycpu->gd_cnt.v_lock_name + 1, info->ident,
+                       sizeof(mycpu->gd_cnt.v_lock_name) - 2);
+       }
+       if (delta >= tsc_frequency) {
+               info->secs += delta / tsc_frequency;
+               info->base += delta;
+               mycpu->gd_cnt.v_lock_colls += delta / tsc_frequency * 1000000U;
+
+               switch(info->type) {
+               case 's':
+                       str = "spin_lock_sh";
+                       break;
+               case 'S':
+                       str = "spin_lock_ex";
+                       break;
+               case 'm':
+                       str = "mutex_sh";
+                       break;
+               case 'M':
+                       str = "mutex_ex";
+                       break;
+               case 'l':
+                       str = "lock_sh";
+                       break;
+               case 'L':
+                       str = "lock_ex";
+                       break;
+               case 't':
+                       str = "token";
+                       break;
+               default:
+                       str = "lock(?)";
+                       break;
+               }
+               kprintf("%s: %s, indefinite wait (%d secs)!\n",
+                       str, info->ident, info->secs);
+               if (panicstr)
+                       return TRUE;
+#if defined(INVARIANTS)
+               if (lock_test_mode) {
+                       print_backtrace(-1);
+                       return TRUE;
+               }
+#endif
+#if defined(INVARIANTS)
+               if (info->secs == 11 &&
+                   (info->type == 's' || info->type == 'S')) {
+                       print_backtrace(-1);
+               }
+#endif
+               if (info->secs == 60 &&
+                   (info->type == 's' || info->type == 'S')) {
+                       panic("%s: %s, indefinite wait!", str, info->ident);
+               }
+
+       }
+       return FALSE;
+}
+
+/*
+ * Finalize the state, record collision time in microseconds.
+ */
+static __inline void
+indefinite_done(indefinite_info_t *info)
+{
+       tsc_uclock_t delta;
+
+       if (info->type) {
+               delta = rdtsc() - info->base;
+               delta = delta * 1000000U / tsc_frequency;
+               if (lock_test_mode && delta > 1000)
+                       kprintf("TEST %s (%lu)\n", info->ident, delta);
+               mycpu->gd_cnt.v_lock_colls += delta;
+               info->type = 0;
+       }
+}
+
+#endif
index d60ef13..42c34d8 100644 (file)
@@ -163,10 +163,11 @@ struct lock {
  * The first three flags may be set in lock_init to set their mode permanently,
  * or passed in as arguments to the lock manager.
  */
-#define LK_EXTFLG_MASK 0x07000070      /* mask of external flags */
+#define LK_EXTFLG_MASK 0x070000F0      /* mask of external flags */
 #define LK_NOWAIT      0x00000010      /* do not sleep to await lock */
 #define LK_SLEEPFAIL   0x00000020      /* sleep, then return failure */
 #define LK_CANRECURSE  0x00000040      /* allow recursive exclusive lock */
+#define LK_NOCOLLSTATS 0x00000080      /* v_lock_coll not applicable */
 #define        LK_CANCELABLE   0x01000000      /* blocked caller can be canceled */
 #define LK_TIMELOCK    0x02000000
 #define LK_PCATCH      0x04000000      /* timelocked with signal catching */
index 32e1a79..7cd73c3 100644 (file)
@@ -66,10 +66,11 @@ microtime_pcpu_get(union microtime_pcpu *t)
 
 static __inline int
 microtime_pcpu_diff(const union microtime_pcpu *s,
-    const union microtime_pcpu *e)
+                   const union microtime_pcpu *e)
 {
        if (tsc_invariant) {
-               return (((e->tsc - s->tsc) * 1000000) / tsc_frequency);
+               return (((e->tsc - s->tsc) * 1000000) /
+                       (tsc_sclock_t)tsc_frequency);
        } else {
                return ((e->tv.tv_usec - s->tv.tv_usec) +
                        (e->tv.tv_sec - s->tv.tv_sec) * 1000000);
index 51b7ea2..38fa18b 100644 (file)
@@ -65,17 +65,20 @@ typedef struct mtx_link     mtx_link_t;
 
 struct mtx {
        volatile u_int  mtx_lock;
-       int             mtx_reserved01; /* future use & struct alignmnent */
+       uint32_t        mtx_flags;
        struct thread   *mtx_owner;
        mtx_link_t      *mtx_exlink;
        mtx_link_t      *mtx_shlink;
        const char      *mtx_ident;
 } __cachealign;
 
+#define MTXF_NOCOLLSTATS       0x00000001      /* v_lock_coll not applicable */
+
 typedef struct mtx     mtx_t;
 typedef u_int          mtx_state_t;
 
-#define MTX_INITIALIZER(ident) { .mtx_lock = 0, .mtx_owner = NULL, \
+#define MTX_INITIALIZER(ident) { .mtx_lock = 0, .mtx_flags = 0,        \
+                                 .mtx_owner = NULL,                    \
                                  .mtx_exlink = NULL, .mtx_shlink = NULL, \
                                  .mtx_ident = ident }
 
index b03d1a4..a0bc402 100644 (file)
@@ -53,6 +53,18 @@ static __inline void
 mtx_init(mtx_t *mtx, const char *ident)
 {
        mtx->mtx_lock = 0;
+       mtx->mtx_flags = 0;
+       mtx->mtx_owner = NULL;
+       mtx->mtx_exlink = NULL;
+       mtx->mtx_shlink = NULL;
+       mtx->mtx_ident = ident;
+}
+
+static __inline void
+mtx_init_flags(mtx_t *mtx, const char *ident, uint32_t flags)
+{
+       mtx->mtx_lock = 0;
+       mtx->mtx_flags = flags;
        mtx->mtx_owner = NULL;
        mtx->mtx_exlink = NULL;
        mtx->mtx_shlink = NULL;
index 0523731..b4819ce 100644 (file)
@@ -35,6 +35,9 @@
 #ifndef _SYS_IOSCHED_H_
 #include <sys/iosched.h>
 #endif
+#ifndef _SYS_INDEFINITE_H_
+#include <sys/indefinite.h>
+#endif
 #include <machine/thread.h>
 
 struct globaldata;
@@ -272,6 +275,7 @@ struct thread {
     int                td_in_crit_report;      
 #endif
     struct md_thread td_mach;
+    indefinite_info_t  td_indefinite;
 #ifdef DEBUG_LOCKS
 #define SPINLOCK_DEBUG_ARRAY_SIZE      32
    int         td_spinlock_stack_id[SPINLOCK_DEBUG_ARRAY_SIZE];
index a5f8624..2a101d4 100644 (file)
@@ -35,6 +35,7 @@
 
 #ifdef _KERNEL
 #include <sys/types.h>
+#include <machine/clock.h>
 #else
 #include <machine/stdint.h>
 #endif
@@ -221,11 +222,12 @@ int       tvtohz_high(struct timeval *);
 int    tvtohz_low(struct timeval *);
 int    tstohz_high(struct timespec *);
 int    tstohz_low(struct timespec *);
-int64_t        tsc_get_target(int ns);
 int    tsc_test_target(int64_t target);
 void   tsc_delay(int ns);
 int    nanosleep1(struct timespec *rqt, struct timespec *rmt);
 
+tsc_uclock_t tsc_get_target(int ns);
+
 #else /* !_KERNEL */
 
 #include <time.h>
index 2af2e01..af1224e 100644 (file)
@@ -1031,8 +1031,8 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
        } else {
                nmp = objcache_get(nfsmount_objcache, M_WAITOK);
                bzero((caddr_t)nmp, sizeof (struct nfsmount));
-               mtx_init(&nmp->nm_rxlock, "nfsrx");
-               mtx_init(&nmp->nm_txlock, "nfstx");
+               mtx_init_flags(&nmp->nm_rxlock, "nfsrx", MTXF_NOCOLLSTATS);
+               mtx_init_flags(&nmp->nm_txlock, "nfstx", MTXF_NOCOLLSTATS);
                TAILQ_INIT(&nmp->nm_uidlruhead);
                TAILQ_INIT(&nmp->nm_bioq);
                TAILQ_INIT(&nmp->nm_reqq);