kernel - Provide descriptions for lwkt.* and debug.* sysctl's
[dragonfly.git] / sys / kern / kern_mplock.c
CommitLineData
684a93c4
MD
1/*
2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35/*
36 * Helper functions for MP lock acquisition and release.
37 */
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/proc.h>
43#include <sys/rtprio.h>
44#include <sys/queue.h>
45#include <sys/sysctl.h>
46#include <sys/kthread.h>
47#include <machine/cpu.h>
48#include <sys/lock.h>
49#include <sys/caps.h>
50#include <sys/spinlock.h>
51#include <sys/ktr.h>
52
53#include <sys/thread2.h>
54#include <sys/mplock2.h>
55#include <sys/spinlock2.h>
56
57#ifdef SMP
58static int chain_mplock = 0;
59static int bgl_yield = 10;
60static __int64_t mplock_contention_count = 0;
61
0c52fa62
SG
62SYSCTL_INT(_lwkt, OID_AUTO, chain_mplock, CTLFLAG_RW, &chain_mplock, 0,
63 "Chain IPI's to other CPU's potentially needing the MP lock when it is yielded");
64SYSCTL_INT(_lwkt, OID_AUTO, bgl_yield_delay, CTLFLAG_RW, &bgl_yield, 0,
65 "Duration of delay when MP lock is temporarily yielded");
684a93c4
MD
66SYSCTL_QUAD(_lwkt, OID_AUTO, mplock_contention_count, CTLFLAG_RW,
67 &mplock_contention_count, 0, "spinning due to MPLOCK contention");
68
69/*
70 * Kernel Trace
71 */
72#if !defined(KTR_GIANT_CONTENTION)
73#define KTR_GIANT_CONTENTION KTR_ALL
74#endif
75
76KTR_INFO_MASTER(giant);
77KTR_INFO(KTR_GIANT_CONTENTION, giant, beg, 0,
78 "thread=%p held %s:%-5d want %s:%-5d",
79 sizeof(void *) * 3 + sizeof(int) * 2);
80KTR_INFO(KTR_GIANT_CONTENTION, giant, end, 1,
81 "thread=%p held %s:%-5d want %s:%-5d",
82 sizeof(void *) * 3 + sizeof(int) * 2);
83
84#define loggiant(name) \
85 KTR_LOG(giant_ ## name, curthread, \
86 mp_lock_holder_file, mp_lock_holder_line, \
87 file, line)
88
89int mp_lock;
c5724852 90int cpu_contention_mask;
684a93c4
MD
91const char *mp_lock_holder_file; /* debugging */
92int mp_lock_holder_line; /* debugging */
93
94/*
95 * Sets up the initial MP lock state near the start of the kernel boot
96 */
97void
98cpu_get_initial_mplock(void)
99{
100 mp_lock = 0; /* cpu 0 */
101 curthread->td_mpcount = 1;
102}
103
104/*
4a28fe22 105 * This code is called from the get_mplock() inline when the mplock
3933a3ab
MD
106 * is not already held. td_mpcount has already been predisposed
107 * (incremented).
4a28fe22
MD
108 */
109void
110_get_mplock_predisposed(const char *file, int line)
111{
112 globaldata_t gd = mycpu;
113
114 if (gd->gd_intr_nesting_level) {
115 panic("Attempt to acquire mplock not already held "
116 "in hard section, ipi or interrupt %s:%d",
117 file, line);
118 }
119 if (atomic_cmpset_int(&mp_lock, -1, gd->gd_cpuid) == 0)
120 _get_mplock_contested(file, line);
121#ifdef INVARIANTS
122 mp_lock_holder_file = file;
123 mp_lock_holder_line = line;
124#endif
125}
126
127/*
684a93c4
MD
128 * Called when the MP lock could not be trvially acquired. The caller
129 * has already bumped td_mpcount.
130 */
131void
132_get_mplock_contested(const char *file, int line)
133{
134 globaldata_t gd = mycpu;
135 int ov;
136 int nv;
b37f18d6 137 const void **stkframe = (const void **)&file;
684a93c4
MD
138
139 ++mplock_contention_count;
140 for (;;) {
141 ov = mp_lock;
142 nv = gd->gd_cpuid;
143 if (ov == gd->gd_cpuid)
144 break;
145 if (ov == -1) {
146 if (atomic_cmpset_int(&mp_lock, ov, gd->gd_cpuid))
147 break;
148 } else {
b37f18d6 149 gd->gd_curthread->td_mplock_stallpc = stkframe[-1];
684a93c4
MD
150 loggiant(beg);
151 lwkt_switch();
152 loggiant(end);
153 KKASSERT(gd->gd_cpuid == mp_lock);
154 break;
155 }
156 }
157}
158
159/*
3933a3ab
MD
160 * Called if td_mpcount went negative or if td_mpcount + td_xpcount is 0
161 * and we were unable to release the MP lock. Handles sanity checks
162 * and conflicts.
684a93c4
MD
163 *
164 * It is possible for the inline release to have raced an interrupt which
165 * get/rel'd the MP lock, causing the inline's cmpset to fail. If this
166 * case occurs mp_lock will either already be in a released state or it
167 * will have already been acquired by another cpu.
168 */
169void
170_rel_mplock_contested(void)
171{
172 globaldata_t gd = mycpu;
3933a3ab 173 thread_t td = gd->gd_curthread;
684a93c4
MD
174 int ov;
175
3933a3ab
MD
176 KKASSERT(td->td_mpcount >= 0);
177 if (td->td_mpcount + td->td_xpcount == 0) {
178 for (;;) {
179 ov = mp_lock;
180 if (ov != gd->gd_cpuid)
181 break;
182 if (atomic_cmpset_int(&mp_lock, ov, -1))
183 break;
184 }
684a93c4
MD
185 }
186}
187
188/*
189 * Called when try_mplock() fails.
190 *
191 * The inline bumped td_mpcount so we have to undo it.
192 *
193 * It is possible to race an interrupt which acquired and released the
194 * MP lock. When combined with the td_mpcount decrement we do the MP lock
195 * can wind up in any state and possibly not even owned by us.
196 *
197 * It is also possible for this function to be called even if td_mpcount > 1
198 * if someone bumped it and raced an interrupt which then called try_mpock().
199 */
200void
201_try_mplock_contested(const char *file, int line)
202{
203 globaldata_t gd = mycpu;
204 thread_t td = gd->gd_curthread;
205 int ov;
206
207 --td->td_mpcount;
208 KKASSERT(td->td_mpcount >= 0);
209 ++mplock_contention_count;
210
3933a3ab
MD
211 if (td->td_mpcount + td->td_xpcount == 0) {
212 for (;;) {
213 ov = mp_lock;
214 if (ov != gd->gd_cpuid)
215 break;
216 if (atomic_cmpset_int(&mp_lock, ov, -1))
217 break;
218 }
684a93c4
MD
219 }
220}
221
222/*
223 * Called when cpu_try_mplock() fails.
224 *
225 * The inline did not touch td_mpcount so we do not either.
226 */
227void
228_cpu_try_mplock_contested(const char *file, int line)
229{
230 ++mplock_contention_count;
231}
232
233/*
234 * Temporarily yield the MP lock. This is part of lwkt_user_yield()
3933a3ab
MD
235 * which is kinda hackish. The MP lock cannot be yielded if inherited
236 * due to a preemption.
684a93c4
MD
237 */
238void
239yield_mplock(thread_t td)
240{
241 int savecnt;
242
3933a3ab
MD
243 if (td->td_xpcount == 0) {
244 savecnt = td->td_mpcount;
245 td->td_mpcount = 1;
246 rel_mplock();
247 DELAY(bgl_yield);
248 get_mplock();
249 td->td_mpcount = savecnt;
250 }
684a93c4
MD
251}
252
253#if 0
254
255/*
256 * The rel_mplock() code will call this function after releasing the
c5724852 257 * last reference on the MP lock if cpu_contention_mask is non-zero.
684a93c4
MD
258 *
259 * We then chain an IPI to a single other cpu potentially needing the
260 * lock. This is a bit heuristical and we can wind up with IPIs flying
261 * all over the place.
262 */
263static void lwkt_mp_lock_uncontested_remote(void *arg __unused);
264
265void
266lwkt_mp_lock_uncontested(void)
267{
268 globaldata_t gd;
269 globaldata_t dgd;
270 cpumask_t mask;
271 cpumask_t tmpmask;
272 int cpuid;
273
274 if (chain_mplock) {
275 gd = mycpu;
276 clr_mplock_contention_mask(gd);
c5724852 277 mask = cpu_contention_mask;
684a93c4
MD
278 tmpmask = ~((1 << gd->gd_cpuid) - 1);
279
280 if (mask) {
281 if (mask & tmpmask)
282 cpuid = bsfl(mask & tmpmask);
283 else
284 cpuid = bsfl(mask);
c5724852 285 atomic_clear_int(&cpu_contention_mask, 1 << cpuid);
684a93c4
MD
286 dgd = globaldata_find(cpuid);
287 lwkt_send_ipiq(dgd, lwkt_mp_lock_uncontested_remote, NULL);
288 }
289 }
290}
291
292/*
293 * The idea is for this IPI to interrupt a potentially lower priority
294 * thread, such as a user thread, to allow the scheduler to reschedule
295 * a higher priority kernel thread that needs the MP lock.
296 *
297 * For now we set the LWKT reschedule flag which generates an AST in
298 * doreti, though theoretically it is also possible to possibly preempt
299 * here if the underlying thread was operating in user mode. Nah.
300 */
301static void
302lwkt_mp_lock_uncontested_remote(void *arg __unused)
303{
304 need_lwkt_resched();
305}
306
307#endif
308
309#endif /* SMP */