From 050032ec7b66c8e64eeacdcd933e9f72ec8ea486 Mon Sep 17 00:00:00 2001
From: Matthew Dillon <dillon@apollo.backplane.com>
Date: Thu, 7 Nov 2013 23:22:20 -0800
Subject: [PATCH] kernel - Improve SMP collision statistics

* Populate the per-cpu collision counter and label from the spinlock,
  lockmgr lock, and mutex code.  The token code already used it.

* Pass __func__ to the spinlock routines so it can be copied into
  the per-cpu collision label.
---
 sys/kern/kern_lock.c     | 28 ++++++++++++++++++++++++++
 sys/kern/kern_mutex.c    | 13 ++++++++++++
 sys/kern/kern_spinlock.c | 43 ++++++++++++++++++++--------------------
 sys/kern/kern_synch.c    |  2 +-
 sys/kern/lwkt_thread.c   |  4 ++--
 sys/kern/lwkt_token.c    |  7 ++++---
 sys/sys/spinlock2.h      | 35 ++++++++++++++++----------------
 sys/sys/vmmeter.h        |  4 ++--
 8 files changed, 90 insertions(+), 46 deletions(-)

diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c
index efbf200c18..38594580f9 100644
--- a/sys/kern/kern_lock.c
+++ b/sys/kern/kern_lock.c
@@ -181,6 +181,13 @@ again:
 					      count | LKC_SHREQ)) {
 				goto again;
 			}
+
+			mycpu->gd_cnt.v_lock_name[0] = 'S';
+			strncpy(mycpu->gd_cnt.v_lock_name + 1,
+				lkp->lk_wmesg,
+				sizeof(mycpu->gd_cnt.v_lock_name) - 2);
+			++mycpu->gd_cnt.v_lock_colls;
+
 			error = tsleep(lkp, pflags | PINTERLOCKED,
 				       lkp->lk_wmesg, timo);
 			if (error)
@@ -254,6 +261,13 @@ again:
 				       count | LKC_EXREQ)) {
 			goto again;
 		}
+
+		mycpu->gd_cnt.v_lock_name[0] = 'X';
+		strncpy(mycpu->gd_cnt.v_lock_name + 1,
+			lkp->lk_wmesg,
+			sizeof(mycpu->gd_cnt.v_lock_name) - 2);
+		++mycpu->gd_cnt.v_lock_colls;
+
 		error = tsleep(lkp, pflags | PINTERLOCKED,
 			       lkp->lk_wmesg, timo);
 		if (error)
@@ -369,6 +383,13 @@ again:
 		if (atomic_cmpset_int(&lkp->lk_count, count,
 				      (count - 1) | wflags)) {
 			COUNT(td, -1);
+
+			mycpu->gd_cnt.v_lock_name[0] = 'U';
+			strncpy(mycpu->gd_cnt.v_lock_name + 1,
+				lkp->lk_wmesg,
+				sizeof(mycpu->gd_cnt.v_lock_name) - 2);
+			++mycpu->gd_cnt.v_lock_colls;
+
 			error = tsleep(lkp, pflags | PINTERLOCKED,
 				       lkp->lk_wmesg, timo);
 			if (error)
@@ -412,6 +433,13 @@ again:
 			timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
 			tsleep_interlock(lkp, pflags);
 			if (atomic_cmpset_int(&lkp->lk_count, count, count)) {
+
+				mycpu->gd_cnt.v_lock_name[0] = 'U';
+				strncpy(mycpu->gd_cnt.v_lock_name + 1,
+					lkp->lk_wmesg,
+					sizeof(mycpu->gd_cnt.v_lock_name) - 2);
+				++mycpu->gd_cnt.v_lock_colls;
+
 				error = tsleep(lkp, pflags | PINTERLOCKED,
 					       lkp->lk_wmesg, timo);
 				if (error) {
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index 590077b033..81020a49e3 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -158,6 +158,12 @@ __mtx_lock_ex(mtx_t mtx, mtx_link_t link, const char *ident, int flags, int to)
 				atomic_clear_int(&mtx->mtx_lock, MTX_EXLINK);
 				--td->td_critcount;
 
+				mycpu->gd_cnt.v_lock_name[0] = 'X';
+				strncpy(mycpu->gd_cnt.v_lock_name + 1,
+					ident,
+					sizeof(mycpu->gd_cnt.v_lock_name) - 2);
+				++mycpu->gd_cnt.v_lock_colls;
+
 				error = tsleep(link, flags | PINTERLOCKED,
 					       ident, to);
 				++mtx_contention_count;
@@ -250,6 +256,13 @@ __mtx_lock_sh(mtx_t mtx, const char *ident, int flags, int to)
 			nlock = lock | MTX_SHWANTED;
 			tsleep_interlock(mtx, 0);
 			if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
+
+				mycpu->gd_cnt.v_lock_name[0] = 'S';
+				strncpy(mycpu->gd_cnt.v_lock_name + 1,
+					ident,
+					sizeof(mycpu->gd_cnt.v_lock_name) - 2);
+				++mycpu->gd_cnt.v_lock_colls;
+
 				error = tsleep(mtx, flags | PINTERLOCKED,
 					       ident, to);
 				if (error)
diff --git a/sys/kern/kern_spinlock.c b/sys/kern/kern_spinlock.c
index e416b5a42d..a2764d8c27 100644
--- a/sys/kern/kern_spinlock.c
+++ b/sys/kern/kern_spinlock.c
@@ -75,6 +75,7 @@ struct spinlock pmap_spin = SPINLOCK_INITIALIZER(pmap_spin);
 struct indefinite_info {
 	sysclock_t	base;
 	int		secs;
+	const char	*ident;
 };
 
 /*
@@ -99,16 +100,6 @@ KTR_INFO(KTR_SPIN_CONTENTION, spin, end, 1, SPIN_STRING, SPIN_ARG_SIZE);
 static int spin_lock_test_mode;
 #endif
 
-static int64_t spinlocks_contested1;
-SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested1, CTLFLAG_RD,
-    &spinlocks_contested1, 0,
-    "Spinlock contention count due to collisions with exclusive lock holders");
-
-static int64_t spinlocks_contested2;
-SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested2, CTLFLAG_RD,
-    &spinlocks_contested2, 0,
-    "Serious spinlock contention count");
-
 #ifdef DEBUG_LOCKS_LATENCY
 
 static long spinlocks_add_latency;
@@ -146,7 +137,6 @@ spin_trylock_contested(struct spinlock *spin)
 {
 	globaldata_t gd = mycpu;
 
-	/*++spinlocks_contested1;*/
 	/*atomic_add_int(&spin->counta, -1);*/
 	--gd->gd_spinlocks;
 	--gd->gd_curthread->td_critcount;
@@ -192,9 +182,9 @@ spin_trylock_contested(struct spinlock *spin)
  *	as well (no difference).
  */
 void
-spin_lock_contested(struct spinlock *spin)
+_spin_lock_contested(struct spinlock *spin, const char *ident)
 {
-	struct indefinite_info info = { 0, 0 };
+	struct indefinite_info info = { 0, 0, ident };
 	int i;
 
 	/*
@@ -248,6 +238,11 @@ spin_lock_contested(struct spinlock *spin)
 			break;
 		}
 		if ((++i & 0x7F) == 0x7F) {
+			mycpu->gd_cnt.v_lock_name[0] = 'X';
+			strncpy(mycpu->gd_cnt.v_lock_name + 1,
+				ident,
+				sizeof(mycpu->gd_cnt.v_lock_name) - 2);
+			++mycpu->gd_cnt.v_lock_colls;
 #if defined(INVARIANTS)
 			++spin->countb;
 #endif
@@ -267,9 +262,9 @@ spin_lock_contested(struct spinlock *spin)
  * The caller has not modified counta.
  */
 void
-spin_lock_shared_contested2(struct spinlock *spin)
+_spin_lock_shared_contested(struct spinlock *spin, const char *ident)
 {
-	struct indefinite_info info = { 0, 0 };
+	struct indefinite_info info = { 0, 0, ident };
 	int i;
 
 #ifdef DEBUG_LOCKS_LATENCY
@@ -320,6 +315,11 @@ spin_lock_shared_contested2(struct spinlock *spin)
 				break;
 		}
 		if ((++i & 0x7F) == 0x7F) {
+			mycpu->gd_cnt.v_lock_name[0] = 'S';
+			strncpy(mycpu->gd_cnt.v_lock_name + 1,
+				ident,
+				sizeof(mycpu->gd_cnt.v_lock_name) - 2);
+			++mycpu->gd_cnt.v_lock_colls;
 #if defined(INVARIANTS)
 			++spin->countb;
 #endif
@@ -347,12 +347,12 @@ _spin_pool_hash(void *ptr)
 }
 
 void
-_spin_pool_lock(void *chan)
+_spin_pool_lock(void *chan, const char *ident)
 {
 	struct spinlock *sp;
 
 	sp = &pool_spinlocks[_spin_pool_hash(chan)].spin;
-	spin_lock(sp);
+	_spin_lock(sp, ident);
 }
 
 void
@@ -378,8 +378,8 @@ spin_indefinite_check(struct spinlock *spin, struct indefinite_info *info)
 		info->base = count;
 		++info->secs;
 	} else if (count - info->base > sys_cputimer->freq) {
-		kprintf("spin_lock: %p, indefinite wait (%d secs)!\n",
-			spin, info->secs);
+		kprintf("spin_lock: %s(%p), indefinite wait (%d secs)!\n",
+			info->ident, spin, info->secs);
 		info->base = count;
 		++info->secs;
 		if (panicstr)
@@ -395,7 +395,8 @@ spin_indefinite_check(struct spinlock *spin, struct indefinite_info *info)
 			print_backtrace(-1);
 #endif
 		if (info->secs == 60)
-			panic("spin_lock: %p, indefinite wait!", spin);
+			panic("spin_lock: %s(%p), indefinite wait!",
+			      info->ident, spin);
 	}
 	return (FALSE);
 }
@@ -449,7 +450,7 @@ sysctl_spin_lock_test(SYSCTL_HANDLER_ARGS)
 
 		spin_init(&spin);
 		for (i = spin_test_count; i > 0; --i) {
-		    spin_lock_quick(gd, &spin);
+		    _spin_lock_quick(gd, &spin, "test");
 		    spin_unlock_quick(gd, &spin);
 		}
 	}
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 6982c29105..570efdac98 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -702,7 +702,7 @@ ssleep(const volatile void *ident, struct spinlock *spin, int flags,
 	_tsleep_interlock(gd, ident, flags);
 	spin_unlock_quick(gd, spin);
 	error = tsleep(ident, flags | PINTERLOCKED, wmesg, timo);
-	spin_lock_quick(gd, spin);
+	_spin_lock_quick(gd, spin, wmesg);
 
 	return (error);
 }
diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c
index 40d15701dc..538976f810 100644
--- a/sys/kern/lwkt_thread.c
+++ b/sys/kern/lwkt_thread.c
@@ -719,7 +719,7 @@ lwkt_switch(void)
 #ifdef	INVARIANTS
 	++ntd->td_contended;
 #endif
-	++gd->gd_cnt.v_token_colls;
+	++gd->gd_cnt.v_lock_colls;
 
 	if (fairq_bypass > 0)
 		goto skip;
@@ -749,7 +749,7 @@ lwkt_switch(void)
 #ifdef	INVARIANTS
 		++ntd->td_contended;
 #endif
-		++gd->gd_cnt.v_token_colls;
+		++gd->gd_cnt.v_lock_colls;
 	}
 
 skip:
diff --git a/sys/kern/lwkt_token.c b/sys/kern/lwkt_token.c
index 4a17614fc2..fdec0d0c36 100644
--- a/sys/kern/lwkt_token.c
+++ b/sys/kern/lwkt_token.c
@@ -476,10 +476,11 @@ lwkt_getalltokens(thread_t td, int spinning)
 			 * Otherwise we failed to acquire all the tokens.
 			 * Release whatever we did get.
 			 */
-			KASSERT(tok->t_desc, ("token %p is not initialized", tok));
-			strncpy(td->td_gd->gd_cnt.v_token_name,
+			KASSERT(tok->t_desc,
+				("token %p is not initialized", tok));
+			strncpy(td->td_gd->gd_cnt.v_lock_name,
 				tok->t_desc,
-				sizeof(td->td_gd->gd_cnt.v_token_name) - 1);
+				sizeof(td->td_gd->gd_cnt.v_lock_name) - 1);
 
 			if (lwkt_sched_debug > 0) {
 				--lwkt_sched_debug;
diff --git a/sys/sys/spinlock2.h b/sys/sys/spinlock2.h
index 73aa7e542a..e08da392b0 100644
--- a/sys/sys/spinlock2.h
+++ b/sys/sys/spinlock2.h
@@ -54,11 +54,17 @@
 extern struct spinlock pmap_spin;
 
 int spin_trylock_contested(struct spinlock *spin);
-void spin_lock_contested(struct spinlock *spin);
-void spin_lock_shared_contested2(struct spinlock *spin);
-void _spin_pool_lock(void *chan);
+void _spin_lock_contested(struct spinlock *spin, const char *ident);
+void _spin_lock_shared_contested(struct spinlock *spin, const char *ident);
+void _spin_pool_lock(void *chan, const char *ident);
 void _spin_pool_unlock(void *chan);
 
+#define spin_lock(spin)			_spin_lock(spin, __func__)
+#define spin_lock_quick(spin)		_spin_lock_quick(spin, __func__)
+#define spin_lock_shared(spin)		_spin_lock_shared(spin, __func__)
+#define spin_lock_shared_quick(spin)	_spin_lock_shared_quick(spin, __func__)
+#define spin_pool_lock(chan)		_spin_pool_lock(chan, __func__)
+
 /*
  * Attempt to obtain an exclusive spinlock.  Returns FALSE on failure,
  * TRUE on success.
@@ -101,14 +107,14 @@ spin_held(struct spinlock *spin)
  * Obtain an exclusive spinlock and return.
  */
 static __inline void
-spin_lock_quick(globaldata_t gd, struct spinlock *spin)
+_spin_lock_quick(globaldata_t gd, struct spinlock *spin, const char *ident)
 {
 	++gd->gd_curthread->td_critcount;
 	cpu_ccfence();
 	++gd->gd_spinlocks;
 	atomic_add_int(&spin->counta, 1);
 	if (spin->counta != 1)
-		spin_lock_contested(spin);
+		_spin_lock_contested(spin, ident);
 #ifdef DEBUG_LOCKS
 	int i;
 	for (i = 0; i < SPINLOCK_DEBUG_ARRAY_SIZE; i++) {
@@ -124,9 +130,9 @@ spin_lock_quick(globaldata_t gd, struct spinlock *spin)
 }
 
 static __inline void
-spin_lock(struct spinlock *spin)
+_spin_lock(struct spinlock *spin, const char *ident)
 {
-	spin_lock_quick(mycpu, spin);
+	_spin_lock_quick(mycpu, spin, ident);
 }
 
 /*
@@ -177,13 +183,14 @@ spin_unlock(struct spinlock *spin)
  * Shared spinlocks
  */
 static __inline void
-spin_lock_shared_quick(globaldata_t gd, struct spinlock *spin)
+_spin_lock_shared_quick(globaldata_t gd, struct spinlock *spin,
+			const char *ident)
 {
 	++gd->gd_curthread->td_critcount;
 	cpu_ccfence();
 	++gd->gd_spinlocks;
 	if (atomic_cmpset_int(&spin->counta, 0, SPINLOCK_SHARED | 1) == 0)
-		spin_lock_shared_contested2(spin);
+		_spin_lock_shared_contested(spin, ident);
 #ifdef DEBUG_LOCKS
 	int i;
 	for (i = 0; i < SPINLOCK_DEBUG_ARRAY_SIZE; i++) {
@@ -238,9 +245,9 @@ spin_unlock_shared_quick(globaldata_t gd, struct spinlock *spin)
 }
 
 static __inline void
-spin_lock_shared(struct spinlock *spin)
+_spin_lock_shared(struct spinlock *spin, const char *ident)
 {
-	spin_lock_shared_quick(mycpu, spin);
+	_spin_lock_shared_quick(mycpu, spin, ident);
 }
 
 static __inline void
@@ -249,12 +256,6 @@ spin_unlock_shared(struct spinlock *spin)
 	spin_unlock_shared_quick(mycpu, spin);
 }
 
-static __inline void
-spin_pool_lock(void *chan)
-{
-	_spin_pool_lock(chan);
-}
-
 static __inline void
 spin_pool_unlock(void *chan)
 {
diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h
index 1a62ec08b0..35ea404fcc 100644
--- a/sys/sys/vmmeter.h
+++ b/sys/sys/vmmeter.h
@@ -102,8 +102,8 @@ struct vmmeter {
 	u_int v_waitsys;	/* calls to waitsys() */
 	u_int v_smpinvltlb;	/* nasty global invltlbs */
 	u_int v_ppwakeups;	/* wakeups on processes stalled on VM */
-	u_int v_token_colls;	/* # of token collisions */
-	char  v_token_name[16];	/* last-colliding token name */
+	u_int v_lock_colls;	/* # of token, lock, or spin collisions */
+	char  v_lock_name[16];	/* last-colliding token, lock, or spin name */
 	u_int v_reserved6;
 	u_int v_reserved7;
 #define vmmeter_uint_end	v_reserved7
-- 
2.41.0