Merge from vendor branch SENDMAIL:
[dragonfly.git] / sys / kern / kern_intr.c
CommitLineData
984263bc 1/*
033a4603 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
ef0fdad1 3 * Copyright (c) 1997, Stefan Esser <se@freebsd.org> All rights reserved.
984263bc
MD
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * $FreeBSD: src/sys/kern/kern_intr.c,v 1.24.2.1 2001/10/14 20:05:50 luigi Exp $
044ee7c4 27 * $DragonFly: src/sys/kern/kern_intr.c,v 1.20 2005/06/01 17:43:42 dillon Exp $
984263bc
MD
28 *
29 */
30
984263bc
MD
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/malloc.h>
34#include <sys/kernel.h>
35#include <sys/sysctl.h>
ef0fdad1
MD
36#include <sys/thread.h>
37#include <sys/proc.h>
38#include <sys/thread2.h>
7e071e7a 39#include <sys/random.h>
984263bc
MD
40
41#include <machine/ipl.h>
42
43#include <sys/interrupt.h>
44
ef0fdad1
MD
45typedef struct intrec {
46 struct intrec *next;
47 inthand2_t *handler;
45d76888 48 intrmask_t *maskptr; /* LEGACY */
ef0fdad1
MD
49 void *argument;
50 const char *name;
51 int intr;
52} intrec_t;
53
54static intrec_t *intlists[NHWI+NSWI];
55static thread_t ithreads[NHWI+NSWI];
56static struct thread ithread_ary[NHWI+NSWI];
7e071e7a 57static struct random_softc irandom_ary[NHWI+NSWI];
96728c05 58static int irunning[NHWI+NSWI];
93781523
MD
59static u_int ill_count[NHWI+NSWI]; /* interrupt livelock counter */
60static u_int ill_ticks[NHWI+NSWI]; /* track elapsed to calculate freq */
61static u_int ill_delta[NHWI+NSWI]; /* track elapsed to calculate freq */
37d44089 62static int ill_state[NHWI+NSWI]; /* current state */
93781523
MD
63static struct systimer ill_timer[NHWI+NSWI]; /* enforced freq. timer */
64static struct systimer ill_rtimer[NHWI+NSWI]; /* recovery timer */
45d76888 65static intrmask_t dummy_intr_mask;
37d44089
MD
66
67#define LIVELOCK_NONE 0
68#define LIVELOCK_LIMITED 1
69
93781523
MD
70static int livelock_limit = 50000;
71static int livelock_fallback = 20000;
37d44089
MD
72SYSCTL_INT(_kern, OID_AUTO, livelock_limit,
73 CTLFLAG_RW, &livelock_limit, 0, "Livelock interrupt rate limit");
74SYSCTL_INT(_kern, OID_AUTO, livelock_fallback,
75 CTLFLAG_RW, &livelock_fallback, 0, "Livelock interrupt fallback rate");
984263bc 76
45d76888
MD
77/*
78 * TEMPORARY sysctl to allow interrupt handlers to run without the critical
79 * section (if set to 0).
80 *
81 * SEQUENCE OF EVENTS: default to prior operation, testing, change default
82 * to 0, lots more testing, then make operation without a critical section
83 * mandatory and remove the sysctl code and variable.
84 */
85static int int_use_crit_section = 1;
86SYSCTL_INT(_kern, OID_AUTO, int_use_crit_section,
87 CTLFLAG_RW, &int_use_crit_section, 0, "Run interrupts entirely within a critical section");
88
ef0fdad1 89static void ithread_handler(void *arg);
984263bc 90
45d76888
MD
91/*
92 * Register an SWI or INTerrupt handler.
93 *
94 * Note that maskptr exists to support legacy spl handling and is not intended
95 * to be permanent (because spls are not compatible with BGL removal).
96 */
96728c05 97thread_t
45d76888
MD
98register_swi(int intr, inthand2_t *handler, void *arg, const char *name,
99 intrmask_t *maskptr)
984263bc 100{
ef0fdad1
MD
101 if (intr < NHWI || intr >= NHWI + NSWI)
102 panic("register_swi: bad intr %d", intr);
45d76888 103 return(register_int(intr, handler, arg, name, maskptr));
984263bc
MD
104}
105
96728c05 106thread_t
45d76888
MD
107register_int(int intr, inthand2_t *handler, void *arg, const char *name,
108 intrmask_t *maskptr)
984263bc 109{
ef0fdad1
MD
110 intrec_t **list;
111 intrec_t *rec;
112 thread_t td;
113
93781523 114 if (intr < 0 || intr >= NHWI + NSWI)
ef0fdad1 115 panic("register_int: bad intr %d", intr);
45d76888
MD
116 if (maskptr == NULL)
117 maskptr = &dummy_intr_mask;
ef0fdad1
MD
118
119 rec = malloc(sizeof(intrec_t), M_DEVBUF, M_NOWAIT);
120 if (rec == NULL)
121 panic("register_swi: malloc failed");
122 rec->handler = handler;
45d76888 123 rec->maskptr = maskptr;
ef0fdad1
MD
124 rec->argument = arg;
125 rec->name = name;
126 rec->intr = intr;
127 rec->next = NULL;
128
129 list = &intlists[intr];
130
131 /*
132 * Create an interrupt thread if necessary, leave it in an unscheduled
45d76888 133 * state.
ef0fdad1
MD
134 */
135 if ((td = ithreads[intr]) == NULL) {
136 lwkt_create((void *)ithread_handler, (void *)intr, &ithreads[intr],
75cdbe6c
MD
137 &ithread_ary[intr], TDF_STOPREQ|TDF_INTTHREAD, -1,
138 "ithread %d", intr);
ef0fdad1 139 td = ithreads[intr];
4b5f931b 140 if (intr >= NHWI && intr < NHWI + NSWI)
45d76888 141 lwkt_setpri(td, TDPRI_SOFT_NORM);
4b5f931b 142 else
45d76888 143 lwkt_setpri(td, TDPRI_INT_MED);
ef0fdad1
MD
144 }
145
146 /*
147 * Add the record to the interrupt list
148 */
149 crit_enter(); /* token */
150 while (*list != NULL)
151 list = &(*list)->next;
152 *list = rec;
153 crit_exit();
96728c05 154 return(td);
ef0fdad1 155}
984263bc 156
ef0fdad1
MD
157void
158unregister_swi(int intr, inthand2_t *handler)
159{
160 if (intr < NHWI || intr >= NHWI + NSWI)
161 panic("register_swi: bad intr %d", intr);
162 unregister_int(intr, handler);
984263bc
MD
163}
164
165void
ef0fdad1 166unregister_int(int intr, inthand2_t handler)
984263bc 167{
ef0fdad1
MD
168 intrec_t **list;
169 intrec_t *rec;
170
171 if (intr < 0 || intr > NHWI + NSWI)
172 panic("register_int: bad intr %d", intr);
173 list = &intlists[intr];
174 crit_enter();
175 while ((rec = *list) != NULL) {
176 if (rec->handler == (void *)handler) {
177 *list = rec->next;
178 break;
984263bc 179 }
ef0fdad1
MD
180 list = &rec->next;
181 }
182 crit_exit();
183 if (rec != NULL) {
184 free(rec, M_DEVBUF);
185 } else {
186 printf("warning: unregister_int: int %d handler %p not found\n",
187 intr, handler);
188 }
984263bc
MD
189}
190
4b5f931b
MD
191void
192swi_setpriority(int intr, int pri)
193{
194 struct thread *td;
195
196 if (intr < NHWI || intr >= NHWI + NSWI)
197 panic("register_swi: bad intr %d", intr);
198 if ((td = ithreads[intr]) != NULL)
199 lwkt_setpri(td, pri);
200}
201
7e071e7a
MD
202void
203register_randintr(int intr)
204{
205 struct random_softc *sc = &irandom_ary[intr];
206 sc->sc_intr = intr;
207 sc->sc_enabled = 1;
208}
209
210void
211unregister_randintr(int intr)
212{
213 struct random_softc *sc = &irandom_ary[intr];
214 sc->sc_enabled = 0;
215}
216
ef0fdad1 217/*
b68b7282
MD
218 * Dispatch an interrupt. If there's nothing to do we have a stray
219 * interrupt and can just return, leaving the interrupt masked.
96728c05
MD
220 *
221 * We need to schedule the interrupt and set its irunning[] bit. If
222 * we are not on the interrupt thread's cpu we have to send a message
223 * to the correct cpu that will issue the desired action (interlocking
224 * with the interrupt thread's critical section).
225 *
226 * We are NOT in a critical section, which will allow the scheduled
71ef2f5c 227 * interrupt to preempt us. The MP lock might *NOT* be held here.
ef0fdad1 228 */
96728c05
MD
229static void
230sched_ithd_remote(void *arg)
231{
232 sched_ithd((int)arg);
233}
234
ef0fdad1
MD
235void
236sched_ithd(int intr)
237{
238 thread_t td;
239
240 if ((td = ithreads[intr]) != NULL) {
b68b7282 241 if (intlists[intr] == NULL) {
ef0fdad1 242 printf("sched_ithd: stray interrupt %d\n", intr);
b68b7282 243 } else {
a72187e9 244 if (td->td_gd == mycpu) {
96728c05
MD
245 irunning[intr] = 1;
246 lwkt_schedule(td); /* preemption handled internally */
247 } else {
2db3b277 248 lwkt_send_ipiq(td->td_gd, sched_ithd_remote, (void *)intr);
96728c05 249 }
b68b7282 250 }
ef0fdad1
MD
251 } else {
252 printf("sched_ithd: stray interrupt %d\n", intr);
253 }
254}
255
37d44089
MD
256/*
257 * This is run from a periodic SYSTIMER (and thus must be MP safe, the BGL
258 * might not be held).
259 */
260static void
93781523 261ithread_livelock_wakeup(systimer_t info)
37d44089 262{
93781523 263 int intr = (int)info->data;
37d44089
MD
264 thread_t td;
265
266 if ((td = ithreads[intr]) != NULL)
267 lwkt_schedule(td);
268}
269
270
b68b7282 271/*
45d76888
MD
272 * Interrupt threads run this as their main loop.
273 *
274 * The handler begins execution outside a critical section and with the BGL
275 * held.
37d44089
MD
276 *
277 * The irunning state starts at 0. When an interrupt occurs, the hardware
278 * interrupt is disabled and sched_ithd() The HW interrupt remains disabled
279 * until all routines have run. We then call ithread_done() to reenable
45d76888
MD
280 * the HW interrupt and deschedule us until the next interrupt.
281 *
282 * We are responsible for atomically checking irunning[] and ithread_done()
283 * is responsible for atomically checking for platform-specific delayed
284 * interrupts. irunning[] for our irq is only set in the context of our cpu,
285 * so a critical section is a sufficient interlock.
b68b7282 286 */
93781523
MD
287#define LIVELOCK_TIMEFRAME(freq) ((freq) >> 2) /* 1/4 second */
288
ef0fdad1
MD
289static void
290ithread_handler(void *arg)
291{
292 int intr = (int)arg;
37d44089 293 int freq;
93781523
MD
294 u_int bticks;
295 u_int cputicks;
ef0fdad1
MD
296 intrec_t **list = &intlists[intr];
297 intrec_t *rec;
298 intrec_t *nrec;
7e071e7a 299 struct random_softc *sc = &irandom_ary[intr];
45d76888
MD
300 globaldata_t gd = mycpu;
301 int in_crit_section; /* REMOVE WHEN TESTING COMPLETE */
302 intrmask_t s;
303
304 /*
305 * The loop must be entered with one critical section held.
306 */
307 crit_enter_gd(gd);
ef0fdad1 308
ef0fdad1 309 for (;;) {
45d76888
MD
310 /*
311 * Deal with the sysctl variable allowing the interrupt thread to run
312 * without a critical section. Once this is proven out it will
313 * become the default. Note that a critical section is always
314 * held as of the top of the loop.
315 */
316 in_crit_section = int_use_crit_section;
317 if (in_crit_section == 0)
318 crit_exit_gd(gd);
319
93781523
MD
320 /*
321 * We can get woken up by the livelock periodic code too, run the
45d76888
MD
322 * handlers only if there is a real interrupt pending. XXX
323 *
324 * Clear irunning[] prior to running the handlers to interlock
325 * again new events occuring during processing of existing events.
93781523 326 */
a474df86
MD
327 irunning[intr] = 0;
328 for (rec = *list; rec; rec = nrec) {
329 nrec = rec->next;
45d76888 330 s = splq(*rec->maskptr);
a474df86 331 rec->handler(rec->argument);
45d76888 332 splx(s);
ef0fdad1 333 }
37d44089
MD
334
335 /*
336 * This is our interrupt hook to add rate randomness to the random
337 * number generator.
338 */
7e071e7a 339 if (sc->sc_enabled)
96728c05 340 add_interrupt_randomness(intr);
37d44089
MD
341
342 /*
343 * This is our livelock test. If we hit the rate limit we
45d76888 344 * limit ourselves to X interrupts/sec until the rate
37d44089 345 * falls below 50% of that value, then we unlimit again.
45d76888
MD
346 *
347 * XXX calling cputimer_count() is expensive but a livelock may
348 * prevent other interrupts from occuring so we cannot use ticks.
37d44089 349 */
044ee7c4 350 cputicks = sys_cputimer->count();
37d44089 351 ++ill_count[intr];
93781523
MD
352 bticks = cputicks - ill_ticks[intr];
353 ill_ticks[intr] = cputicks;
044ee7c4
MD
354 if (bticks > sys_cputimer->freq)
355 bticks = sys_cputimer->freq;
37d44089
MD
356
357 switch(ill_state[intr]) {
358 case LIVELOCK_NONE:
359 ill_delta[intr] += bticks;
044ee7c4 360 if (ill_delta[intr] < LIVELOCK_TIMEFRAME(sys_cputimer->freq))
37d44089 361 break;
044ee7c4
MD
362 freq = (int64_t)ill_count[intr] * sys_cputimer->freq /
363 ill_delta[intr];
37d44089
MD
364 ill_delta[intr] = 0;
365 ill_count[intr] = 0;
366 if (freq < livelock_limit)
367 break;
368 printf("intr %d at %d hz, livelocked! limiting at %d hz\n",
369 intr, freq, livelock_fallback);
370 ill_state[intr] = LIVELOCK_LIMITED;
371 bticks = 0;
93781523
MD
372 /* force periodic check to avoid stale removal (if ints stop) */
373 systimer_init_periodic(&ill_rtimer[intr], ithread_livelock_wakeup,
374 (void *)intr, 1);
37d44089
MD
375 /* fall through */
376 case LIVELOCK_LIMITED:
377 /*
378 * Delay (us) before rearming the interrupt
379 */
380 systimer_init_oneshot(&ill_timer[intr], ithread_livelock_wakeup,
381 (void *)intr, 1 + 1000000 / livelock_fallback);
382 lwkt_deschedule_self(curthread);
383 lwkt_switch();
93781523
MD
384
385 /* in case we were woken up by something else */
37d44089
MD
386 systimer_del(&ill_timer[intr]);
387
388 /*
389 * Calculate interrupt rate (note that due to our delay it
390 * will not exceed livelock_fallback).
391 */
392 ill_delta[intr] += bticks;
044ee7c4 393 if (ill_delta[intr] < LIVELOCK_TIMEFRAME(sys_cputimer->freq))
37d44089 394 break;
044ee7c4
MD
395 freq = (int64_t)ill_count[intr] * sys_cputimer->freq /
396 ill_delta[intr];
37d44089
MD
397 ill_delta[intr] = 0;
398 ill_count[intr] = 0;
399 if (freq < (livelock_fallback >> 1)) {
400 printf("intr %d at %d hz, removing livelock limit\n",
401 intr, freq);
402 ill_state[intr] = LIVELOCK_NONE;
93781523 403 systimer_del(&ill_rtimer[intr]);
37d44089
MD
404 }
405 break;
406 }
407
408 /*
45d76888
MD
409 * There are two races here. irunning[] is set by sched_ithd()
410 * in the context of our cpu and is critical-section safe. We
411 * are responsible for checking it. ipending is not critical
412 * section safe and must be handled by the platform specific
413 * ithread_done() routine.
37d44089 414 */
45d76888
MD
415 if (in_crit_section) {
416 if (irunning[intr] == 0)
417 ithread_done(intr);
418 } else {
419 crit_enter_gd(gd);
420 if (irunning[intr] == 0)
421 ithread_done(intr);
422 }
423 /* must be in critical section on loop */
ef0fdad1 424 }
ef0fdad1
MD
425}
426
984263bc
MD
427/*
428 * Sysctls used by systat and others: hw.intrnames and hw.intrcnt.
429 * The data for this machine dependent, and the declarations are in machine
430 * dependent code. The layout of intrnames and intrcnt however is machine
431 * independent.
432 *
433 * We do not know the length of intrcnt and intrnames at compile time, so
434 * calculate things at run time.
435 */
436static int
437sysctl_intrnames(SYSCTL_HANDLER_ARGS)
438{
439 return (sysctl_handle_opaque(oidp, intrnames, eintrnames - intrnames,
440 req));
441}
442
443SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD,
444 NULL, 0, sysctl_intrnames, "", "Interrupt Names");
445
446static int
447sysctl_intrcnt(SYSCTL_HANDLER_ARGS)
448{
449 return (sysctl_handle_opaque(oidp, intrcnt,
450 (char *)eintrcnt - (char *)intrcnt, req));
451}
452
453SYSCTL_PROC(_hw, OID_AUTO, intrcnt, CTLTYPE_OPAQUE | CTLFLAG_RD,
454 NULL, 0, sysctl_intrcnt, "", "Interrupt Counts");