kill db_print_backtrace()
[dragonfly.git] / sys / kern / kern_spinlock.c
CommitLineData
b1af91cb
JH
1/*
2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
d666840a 5 * by Jeffrey M. Hsu. and Matthew Dillon
b1af91cb
JH
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
1541028a 32 * $DragonFly: src/sys/kern/kern_spinlock.c,v 1.16 2008/09/11 01:11:42 y0netan1 Exp $
b1af91cb
JH
33 */
34
35#include <sys/param.h>
b02926de 36#include <sys/systm.h>
b1af91cb 37#include <sys/types.h>
b02926de
MD
38#include <sys/kernel.h>
39#include <sys/sysctl.h>
40#ifdef INVARIANTS
41#include <sys/proc.h>
42#endif
895c1f85 43#include <sys/priv.h>
b1af91cb 44#include <machine/atomic.h>
b02926de 45#include <machine/cpufunc.h>
10c66d57 46#include <machine/specialreg.h>
b02926de 47#include <machine/clock.h>
b1af91cb 48#include <sys/spinlock.h>
35a832df 49#include <sys/spinlock2.h>
57aa743c 50#include <sys/ktr.h>
b1af91cb
JH
51
52#define BACKOFF_INITIAL 1
53#define BACKOFF_LIMIT 256
54
55#ifdef SMP
56
57aa743c
MD
57/*
58 * Kernal Trace
59 */
60#if !defined(KTR_SPIN_CONTENTION)
61#define KTR_SPIN_CONTENTION KTR_ALL
62#endif
63#define SPIN_STRING "spin=%p type=%c"
64#define SPIN_ARG_SIZE (sizeof(void *) + sizeof(int))
65
66KTR_INFO_MASTER(spin);
67KTR_INFO(KTR_SPIN_CONTENTION, spin, beg, 0, SPIN_STRING, SPIN_ARG_SIZE);
68KTR_INFO(KTR_SPIN_CONTENTION, spin, end, 1, SPIN_STRING, SPIN_ARG_SIZE);
10c66d57
SZ
69KTR_INFO(KTR_SPIN_CONTENTION, spin, backoff, 2,
70 "spin=%p bo1=%d thr=%p bo=%d",
71 ((2 * sizeof(void *)) + (2 * sizeof(int))));
72KTR_INFO(KTR_SPIN_CONTENTION, spin, bofail, 3, SPIN_STRING, SPIN_ARG_SIZE);
57aa743c
MD
73
74#define logspin(name, mtx, type) \
75 KTR_LOG(spin_ ## name, mtx, type)
76
10c66d57
SZ
77#define logspin_backoff(mtx, bo1, thr, bo) \
78 KTR_LOG(spin_backoff, mtx, bo1, thr, bo)
79
b02926de
MD
80#ifdef INVARIANTS
81static int spin_lock_test_mode;
82#endif
83
84static int64_t spinlocks_contested1;
10c66d57
SZ
85SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested1, CTLFLAG_RD,
86 &spinlocks_contested1, 0, "");
87
b02926de 88static int64_t spinlocks_contested2;
10c66d57
SZ
89SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested2, CTLFLAG_RD,
90 &spinlocks_contested2, 0, "");
91
92static int spinlocks_backoff_limit = BACKOFF_LIMIT;
93SYSCTL_INT(_debug, OID_AUTO, spinlocks_bolim, CTLFLAG_RW,
94 &spinlocks_backoff_limit, 0, "");
b02926de 95
d666840a
MD
96struct exponential_backoff {
97 int backoff;
98 int nsec;
99 struct spinlock *mtx;
100 sysclock_t base;
101};
102static int exponential_backoff(struct exponential_backoff *bo);
103
104static __inline
b1af91cb 105void
d666840a 106exponential_init(struct exponential_backoff *bo, struct spinlock *mtx)
b1af91cb 107{
d666840a
MD
108 bo->backoff = BACKOFF_INITIAL;
109 bo->nsec = 0;
110 bo->mtx = mtx;
111}
112
113/*
114 * We were either contested due to another exclusive lock holder,
115 * or due to the presence of shared locks. We have to undo the mess
116 * we created by returning the shared locks.
117 *
118 * If there was another exclusive lock holder only the exclusive bit
119 * in value will be the only bit set. We don't have to do anything since
120 * restoration does not involve any work.
121 *
122 * Otherwise we successfully obtained the exclusive bit. Attempt to
123 * clear the shared bits. If we are able to clear the shared bits
124 * we win. Otherwise we lose and we have to restore the shared bits
125 * we couldn't clear (and also clear our exclusive bit).
126 */
127int
74af985e 128spin_trylock_wr_contested(globaldata_t gd, struct spinlock *mtx, int value)
d666840a
MD
129{
130 int bit;
b1af91cb 131
b02926de 132 ++spinlocks_contested1;
d666840a
MD
133 if ((value & SPINLOCK_EXCLUSIVE) == 0) {
134 while (value) {
135 bit = bsfl(value);
74af985e 136 if (globaldata_find(bit)->gd_spinlock_rd == mtx) {
d666840a 137 atomic_swap_int(&mtx->lock, value);
74af985e 138 --gd->gd_spinlocks_wr;
d666840a
MD
139 return (FALSE);
140 }
141 value &= ~(1 << bit);
b02926de 142 }
d666840a
MD
143 return (TRUE);
144 }
74af985e 145 --gd->gd_spinlocks_wr;
d666840a 146 return (FALSE);
b1af91cb
JH
147}
148
d666840a
MD
149/*
150 * We were either contested due to another exclusive lock holder,
151 * or due to the presence of shared locks
152 *
153 * NOTE: If value indicates an exclusively held mutex, no shared bits
154 * would have been set and we can throw away value.
155 */
156void
157spin_lock_wr_contested(struct spinlock *mtx, int value)
158{
159 struct exponential_backoff backoff;
160 globaldata_t gd = mycpu;
161 int bit;
162 int mask;
163
164 /*
165 * Wait until we can gain exclusive access vs another exclusive
166 * holder.
167 */
168 exponential_init(&backoff, mtx);
169 ++spinlocks_contested1;
57aa743c 170 logspin(beg, mtx, 'w');
d666840a
MD
171
172 while (value & SPINLOCK_EXCLUSIVE) {
cbcd213c 173 value = atomic_swap_int(&mtx->lock, SPINLOCK_EXCLUSIVE);
d666840a
MD
174 if (exponential_backoff(&backoff)) {
175 value &= ~SPINLOCK_EXCLUSIVE;
176 break;
177 }
178 }
179
180 /*
181 * Kill the cached shared bit for our own cpu. This is the most
182 * common case and there's no sense wasting cpu on it. Since
183 * spinlocks aren't recursive, we can't own a shared ref on the
184 * spinlock while trying to get an exclusive one.
185 *
186 * If multiple bits are set do not stall on any single cpu. Check
187 * all cpus that have the cache bit set, then loop and check again,
188 * until we've cleaned all the bits.
189 */
190 value &= ~gd->gd_cpumask;
191
192 while ((mask = value) != 0) {
193 while (mask) {
194 bit = bsfl(value);
bbb31c5d 195 if (globaldata_find(bit)->gd_spinlock_rd != mtx) {
d666840a
MD
196 value &= ~(1 << bit);
197 } else if (exponential_backoff(&backoff)) {
198 value = 0;
199 break;
200 }
201 mask &= ~(1 << bit);
202 }
203 }
57aa743c 204 logspin(end, mtx, 'w');
d666840a 205}
b02926de
MD
206
207/*
d666840a 208 * The cache bit wasn't set for our cpu. Loop until we can set the bit.
bbb31c5d
MD
209 * As with the spin_lock_rd() inline we need a memory fence after setting
210 * gd_spinlock_rd to interlock against exclusive spinlocks waiting for
d666840a 211 * that field to clear.
b02926de 212 */
d666840a
MD
213void
214spin_lock_rd_contested(struct spinlock *mtx)
215{
216 struct exponential_backoff backoff;
217 globaldata_t gd = mycpu;
218 int value = mtx->lock;
219
e5c830a4
MD
220 /*
221 * Shortcut the op if we can just set the cache bit. This case
222 * occurs when the last lock was an exclusive lock.
223 */
224 while ((value & SPINLOCK_EXCLUSIVE) == 0) {
225 if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask))
226 return;
227 value = mtx->lock;
228 }
229
d666840a
MD
230 exponential_init(&backoff, mtx);
231 ++spinlocks_contested1;
e5c830a4 232
57aa743c 233 logspin(beg, mtx, 'r');
d666840a
MD
234
235 while ((value & gd->gd_cpumask) == 0) {
236 if (value & SPINLOCK_EXCLUSIVE) {
bbb31c5d 237 gd->gd_spinlock_rd = NULL;
d666840a 238 if (exponential_backoff(&backoff)) {
bbb31c5d 239 gd->gd_spinlock_rd = mtx;
d666840a
MD
240 break;
241 }
bbb31c5d 242 gd->gd_spinlock_rd = mtx;
d666840a
MD
243 cpu_mfence();
244 } else {
245 if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask))
246 break;
247 }
248 value = mtx->lock;
249 }
57aa743c 250 logspin(end, mtx, 'r');
d666840a
MD
251}
252
253/*
254 * Handle exponential backoff and indefinite waits.
255 *
256 * If the system is handling a panic we hand the spinlock over to the caller
257 * after 1 second. After 10 seconds we attempt to print a debugger
258 * backtrace. We also run pending interrupts in order to allow a console
259 * break into DDB.
260 */
261static
262int
263exponential_backoff(struct exponential_backoff *bo)
b02926de 264{
b02926de 265 sysclock_t count;
10c66d57
SZ
266 int backoff;
267
268#ifdef _RDTSC_SUPPORTED_
269 if (cpu_feature & CPUID_TSC) {
270 backoff =
58668add
SZ
271 (((u_long)rdtsc() ^ (((u_long)curthread) >> 5)) &
272 (bo->backoff - 1)) + BACKOFF_INITIAL;
10c66d57
SZ
273 } else
274#endif
275 backoff = bo->backoff;
276 logspin_backoff(bo->mtx, bo->backoff, curthread, backoff);
d666840a
MD
277
278 /*
279 * Quick backoff
280 */
10c66d57 281 for (; backoff; --backoff)
14dd663d 282 cpu_pause();
10c66d57 283 if (bo->backoff < spinlocks_backoff_limit) {
d666840a
MD
284 bo->backoff <<= 1;
285 return (FALSE);
10c66d57
SZ
286 } else {
287 bo->backoff = BACKOFF_INITIAL;
d666840a 288 }
b02926de 289
10c66d57
SZ
290 logspin(bofail, bo->mtx, 'u');
291
d666840a
MD
292 /*
293 * Indefinite
294 */
b02926de 295 ++spinlocks_contested2;
06615ccb 296 cpu_spinlock_contested();
d666840a
MD
297 if (bo->nsec == 0) {
298 bo->base = sys_cputimer->count();
299 bo->nsec = 1;
300 }
301
302 count = sys_cputimer->count();
303 if (count - bo->base > sys_cputimer->freq) {
6ea70f76 304 kprintf("spin_lock: %p, indefinite wait!\n", bo->mtx);
d666840a
MD
305 if (panicstr)
306 return (TRUE);
1e5fb84b 307#if defined(INVARIANTS)
d666840a 308 if (spin_lock_test_mode) {
1e5fb84b 309 print_backtrace();
d666840a 310 return (TRUE);
b02926de 311 }
d666840a 312#endif
b526356c 313 ++bo->nsec;
1e5fb84b 314#if defined(INVARIANTS)
b526356c 315 if (bo->nsec == 11)
1e5fb84b 316 print_backtrace();
b526356c 317#endif
d666840a
MD
318 if (bo->nsec == 60)
319 panic("spin_lock: %p, indefinite wait!\n", bo->mtx);
320 splz();
321 bo->base = count;
b02926de 322 }
d666840a 323 return (FALSE);
b02926de
MD
324}
325
326/*
d666840a
MD
327 * If INVARIANTS is enabled various spinlock timing tests can be run
328 * by setting debug.spin_lock_test:
329 *
330 * 1 Test the indefinite wait code
331 * 2 Time the best-case exclusive lock overhead (spin_test_count)
332 * 3 Time the best-case shared lock overhead (spin_test_count)
b02926de
MD
333 */
334
335#ifdef INVARIANTS
336
d666840a
MD
337static int spin_test_count = 10000000;
338SYSCTL_INT(_debug, OID_AUTO, spin_test_count, CTLFLAG_RW, &spin_test_count, 0, "");
339
b02926de
MD
340static int
341sysctl_spin_lock_test(SYSCTL_HANDLER_ARGS)
342{
343 struct spinlock mtx;
344 int error;
345 int value = 0;
d666840a 346 int i;
b02926de 347
895c1f85 348 if ((error = priv_check(curthread, PRIV_ROOT)) != 0)
b02926de
MD
349 return (error);
350 if ((error = SYSCTL_IN(req, &value, sizeof(value))) != 0)
351 return (error);
352
d666840a
MD
353 /*
354 * Indefinite wait test
355 */
b02926de 356 if (value == 1) {
d666840a
MD
357 spin_init(&mtx);
358 spin_lock_wr(&mtx); /* force an indefinite wait */
b02926de 359 spin_lock_test_mode = 1;
d666840a
MD
360 spin_lock_wr(&mtx);
361 spin_unlock_wr(&mtx); /* Clean up the spinlock count */
362 spin_unlock_wr(&mtx);
b02926de
MD
363 spin_lock_test_mode = 0;
364 }
d666840a
MD
365
366 /*
367 * Time best-case exclusive spinlocks
368 */
369 if (value == 2) {
370 globaldata_t gd = mycpu;
371
372 spin_init(&mtx);
373 for (i = spin_test_count; i > 0; --i) {
374 spin_lock_wr_quick(gd, &mtx);
375 spin_unlock_wr_quick(gd, &mtx);
376 }
377 }
378
379 /*
380 * Time best-case shared spinlocks
381 */
382 if (value == 3) {
383 globaldata_t gd = mycpu;
384
385 spin_init(&mtx);
386 for (i = spin_test_count; i > 0; --i) {
387 spin_lock_rd_quick(gd, &mtx);
388 spin_unlock_rd_quick(gd, &mtx);
389 }
390 }
b02926de
MD
391 return (0);
392}
393
394SYSCTL_PROC(_debug, KERN_PROC_ALL, spin_lock_test, CTLFLAG_RW|CTLTYPE_INT,
395 0, 0, sysctl_spin_lock_test, "I", "Test spinlock wait code");
396
d666840a
MD
397#endif /* INVARIANTS */
398#endif /* SMP */