Move the polling systimer initialization code out of kern_clock.c and into
[dragonfly.git] / sys / kern / kern_intr.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3 * Copyright (c) 1997, Stefan Esser <se@freebsd.org> All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * $FreeBSD: src/sys/kern/kern_intr.c,v 1.24.2.1 2001/10/14 20:05:50 luigi Exp $
27 * $DragonFly: src/sys/kern/kern_intr.c,v 1.24 2005/10/13 00:02:22 dillon Exp $
28 *
29 */
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/malloc.h>
34#include <sys/kernel.h>
35#include <sys/sysctl.h>
36#include <sys/thread.h>
37#include <sys/proc.h>
38#include <sys/thread2.h>
39#include <sys/random.h>
40#include <sys/serialize.h>
41#include <sys/bus.h>
42
43#include <machine/ipl.h>
44#include <machine/frame.h>
45
46#include <sys/interrupt.h>
47
48typedef struct intrec {
49 struct intrec *next;
50 inthand2_t *handler;
51 void *argument;
52 char *name;
53 int intr;
54 int intr_flags;
55 struct lwkt_serialize *serializer;
56} *intrec_t;
57
58struct intr_info {
59 intrec_t i_reclist;
60 struct thread i_thread;
61 struct random_softc i_random;
62 int i_running;
63 long i_count;
64 int i_fast;
65 int i_slow;
66 int i_valid_thread;
67} intr_info_ary[NHWI + NSWI];
68
69int intr_info_size = sizeof(intr_info_ary) / sizeof(intr_info_ary[0]);
70
71#define LIVELOCK_NONE 0
72#define LIVELOCK_LIMITED 1
73
74static int livelock_limit = 50000;
75static int livelock_fallback = 20000;
76SYSCTL_INT(_kern, OID_AUTO, livelock_limit,
77 CTLFLAG_RW, &livelock_limit, 0, "Livelock interrupt rate limit");
78SYSCTL_INT(_kern, OID_AUTO, livelock_fallback,
79 CTLFLAG_RW, &livelock_fallback, 0, "Livelock interrupt fallback rate");
80
81static void ithread_handler(void *arg);
82
83/*
84 * Register an SWI or INTerrupt handler.
85 */
86void *
87register_swi(int intr, inthand2_t *handler, void *arg, const char *name,
88 struct lwkt_serialize *serializer)
89{
90 if (intr < NHWI || intr >= NHWI + NSWI)
91 panic("register_swi: bad intr %d", intr);
92 return(register_int(intr, handler, arg, name, serializer, 0));
93}
94
95void *
96register_int(int intr, inthand2_t *handler, void *arg, const char *name,
97 struct lwkt_serialize *serializer, int intr_flags)
98{
99 struct intr_info *info;
100 struct intrec **list;
101 intrec_t rec;
102
103 if (intr < 0 || intr >= NHWI + NSWI)
104 panic("register_int: bad intr %d", intr);
105 if (name == NULL)
106 name = "???";
107 info = &intr_info_ary[intr];
108
109 rec = malloc(sizeof(struct intrec), M_DEVBUF, M_INTWAIT);
110 rec->name = malloc(strlen(name) + 1, M_DEVBUF, M_INTWAIT);
111 strcpy(rec->name, name);
112
113 rec->handler = handler;
114 rec->argument = arg;
115 rec->intr = intr;
116 rec->intr_flags = intr_flags;
117 rec->next = NULL;
118 rec->serializer = serializer;
119
120 list = &info->i_reclist;
121
122 /*
123 * Keep track of how many fast and slow interrupts we have.
124 */
125 if (intr_flags & INTR_FAST)
126 ++info->i_fast;
127 else
128 ++info->i_slow;
129
130 /*
131 * Create an interrupt thread if necessary, leave it in an unscheduled
132 * state.
133 */
134 if (info->i_valid_thread == 0) {
135 info->i_valid_thread = 1;
136 lwkt_create((void *)ithread_handler, (void *)intr, NULL,
137 &info->i_thread, TDF_STOPREQ|TDF_INTTHREAD, -1,
138 "ithread %d", intr);
139 if (intr >= NHWI && intr < NHWI + NSWI)
140 lwkt_setpri(&info->i_thread, TDPRI_SOFT_NORM);
141 else
142 lwkt_setpri(&info->i_thread, TDPRI_INT_MED);
143 info->i_thread.td_preemptable = lwkt_preempt;
144 }
145
146 /*
147 * Add the record to the interrupt list
148 */
149 crit_enter(); /* token */
150 while (*list != NULL)
151 list = &(*list)->next;
152 *list = rec;
153 crit_exit();
154 return(rec);
155}
156
157int
158unregister_swi(void *id)
159{
160 return(unregister_int(id));
161}
162
163int
164unregister_int(void *id)
165{
166 struct intr_info *info;
167 struct intrec **list;
168 intrec_t rec;
169 int intr;
170
171 intr = ((intrec_t)id)->intr;
172
173 if (intr < 0 || intr > NHWI + NSWI)
174 panic("register_int: bad intr %d", intr);
175
176 info = &intr_info_ary[intr];
177
178 /*
179 * Remove the interrupt descriptor
180 */
181 crit_enter();
182 list = &info->i_reclist;
183 while ((rec = *list) != NULL) {
184 if (rec == id) {
185 *list = rec->next;
186 break;
187 }
188 list = &rec->next;
189 }
190 crit_exit();
191
192 /*
193 * Free it, adjust interrupt type counts
194 */
195 if (rec != NULL) {
196 if (rec->intr_flags & INTR_FAST)
197 --info->i_fast;
198 else
199 --info->i_slow;
200 free(rec->name, M_DEVBUF);
201 free(rec, M_DEVBUF);
202 } else {
203 printf("warning: unregister_int: int %d handler for %s not found\n",
204 intr, ((intrec_t)id)->name);
205 }
206
207 /*
208 * Return the number of interrupt vectors still registered on this intr
209 */
210 return(info->i_fast + info->i_slow);
211}
212
213int
214get_registered_intr(void *id)
215{
216 return(((intrec_t)id)->intr);
217}
218
219const char *
220get_registered_name(int intr)
221{
222 intrec_t rec;
223
224 if (intr < 0 || intr > NHWI + NSWI)
225 panic("register_int: bad intr %d", intr);
226
227 if ((rec = intr_info_ary[intr].i_reclist) == NULL)
228 return(NULL);
229 else if (rec->next)
230 return("mux");
231 else
232 return(rec->name);
233}
234
235int
236count_registered_ints(int intr)
237{
238 struct intr_info *info;
239
240 if (intr < 0 || intr > NHWI + NSWI)
241 panic("register_int: bad intr %d", intr);
242 info = &intr_info_ary[intr];
243 return(info->i_fast + info->i_slow);
244}
245
246long
247get_interrupt_counter(int intr)
248{
249 struct intr_info *info;
250
251 if (intr < 0 || intr > NHWI + NSWI)
252 panic("register_int: bad intr %d", intr);
253 info = &intr_info_ary[intr];
254 return(info->i_count);
255}
256
257
258void
259swi_setpriority(int intr, int pri)
260{
261 struct intr_info *info;
262
263 if (intr < NHWI || intr >= NHWI + NSWI)
264 panic("register_swi: bad intr %d", intr);
265 info = &intr_info_ary[intr];
266 if (info->i_valid_thread)
267 lwkt_setpri(&info->i_thread, pri);
268}
269
270void
271register_randintr(int intr)
272{
273 struct intr_info *info;
274
275 if (intr < NHWI || intr >= NHWI + NSWI)
276 panic("register_swi: bad intr %d", intr);
277 info = &intr_info_ary[intr];
278 info->i_random.sc_intr = intr;
279 info->i_random.sc_enabled = 1;
280}
281
282void
283unregister_randintr(int intr)
284{
285 struct intr_info *info;
286
287 if (intr < NHWI || intr >= NHWI + NSWI)
288 panic("register_swi: bad intr %d", intr);
289 info = &intr_info_ary[intr];
290 info->i_random.sc_enabled = 0;
291}
292
293/*
294 * Dispatch an interrupt. If there's nothing to do we have a stray
295 * interrupt and can just return, leaving the interrupt masked.
296 *
297 * We need to schedule the interrupt and set its i_running bit. If
298 * we are not on the interrupt thread's cpu we have to send a message
299 * to the correct cpu that will issue the desired action (interlocking
300 * with the interrupt thread's critical section).
301 *
302 * We are NOT in a critical section, which will allow the scheduled
303 * interrupt to preempt us. The MP lock might *NOT* be held here.
304 */
305static void
306sched_ithd_remote(void *arg)
307{
308 sched_ithd((int)arg);
309}
310
311void
312sched_ithd(int intr)
313{
314 struct intr_info *info;
315
316 info = &intr_info_ary[intr];
317
318 ++info->i_count;
319 if (info->i_valid_thread) {
320 if (info->i_reclist == NULL) {
321 printf("sched_ithd: stray interrupt %d\n", intr);
322 } else {
323 if (info->i_thread.td_gd == mycpu) {
324 info->i_running = 1;
325 /* preemption handled internally */
326 lwkt_schedule(&info->i_thread);
327 } else {
328 lwkt_send_ipiq(info->i_thread.td_gd,
329 sched_ithd_remote, (void *)intr);
330 }
331 }
332 } else {
333 printf("sched_ithd: stray interrupt %d\n", intr);
334 }
335}
336
337/*
338 * This is run from a periodic SYSTIMER (and thus must be MP safe, the BGL
339 * might not be held).
340 */
341static void
342ithread_livelock_wakeup(systimer_t st)
343{
344 struct intr_info *info;
345
346 info = &intr_info_ary[(int)st->data];
347 if (info->i_valid_thread)
348 lwkt_schedule(&info->i_thread);
349}
350
351/*
352 * This function is called drectly from the ICU or APIC vector code assembly
353 * to process an interrupt. The critical section and interrupt deferral
354 * checks have already been done but the function is entered WITHOUT
355 * a critical section held. The BGL may or may not be held.
356 *
357 * Must return non-zero if we do not want the vector code to re-enable
358 * the interrupt (which we don't if we have to schedule the interrupt)
359 */
360int ithread_fast_handler(struct intrframe frame);
361
362int
363ithread_fast_handler(struct intrframe frame)
364{
365 int intr;
366 struct intr_info *info;
367 struct intrec **list;
368 int must_schedule;
369#ifdef SMP
370 int got_mplock;
371#endif
372 intrec_t rec, next_rec;
373 globaldata_t gd;
374
375 intr = frame.if_vec;
376 gd = mycpu;
377
378 info = &intr_info_ary[intr];
379
380 /*
381 * If we are not processing any FAST interrupts, just schedule the thing.
382 * (since we aren't in a critical section, this can result in a
383 * preemption)
384 */
385 if (info->i_fast == 0) {
386 sched_ithd(intr);
387 return(1);
388 }
389
390 /*
391 * This should not normally occur since interrupts ought to be
392 * masked if the ithread has been scheduled or is running.
393 */
394 if (info->i_running)
395 return(1);
396
397 /*
398 * Bump the interrupt nesting level to process any FAST interrupts.
399 * Obtain the MP lock as necessary. If the MP lock cannot be obtained,
400 * schedule the interrupt thread to deal with the issue instead.
401 *
402 * To reduce overhead, just leave the MP lock held once it has been
403 * obtained.
404 */
405 crit_enter_gd(gd);
406 ++gd->gd_intr_nesting_level;
407 ++gd->gd_cnt.v_intr;
408 must_schedule = info->i_slow;
409#ifdef SMP
410 got_mplock = 0;
411#endif
412
413 list = &info->i_reclist;
414 for (rec = *list; rec; rec = next_rec) {
415 next_rec = rec->next; /* rec may be invalid after call */
416
417 if (rec->intr_flags & INTR_FAST) {
418#ifdef SMP
419 if ((rec->intr_flags & INTR_MPSAFE) == 0 && got_mplock == 0) {
420 if (try_mplock() == 0) {
421 /*
422 * XXX forward to the cpu holding the MP lock
423 */
424 must_schedule = 1;
425 break;
426 }
427 got_mplock = 1;
428 }
429#endif
430 if (rec->serializer) {
431 must_schedule += lwkt_serialize_handler_try(
432 rec->serializer, rec->handler,
433 rec->argument, &frame);
434 } else {
435 rec->handler(rec->argument, &frame);
436 }
437 }
438 }
439
440 /*
441 * Cleanup
442 */
443 --gd->gd_intr_nesting_level;
444#ifdef SMP
445 if (got_mplock)
446 rel_mplock();
447#endif
448 crit_exit_gd(gd);
449
450 /*
451 * If we had a problem, schedule the thread to catch the missed
452 * records (it will just re-run all of them). A return value of 0
453 * indicates that all handlers have been run and the interrupt can
454 * be re-enabled, and a non-zero return indicates that the interrupt
455 * thread controls re-enablement.
456 */
457 if (must_schedule)
458 sched_ithd(intr);
459 else
460 ++info->i_count;
461 return(must_schedule);
462}
463
464#if 0
465
4666: ; \
467 /* could not get the MP lock, forward the interrupt */ \
468 movl mp_lock, %eax ; /* check race */ \
469 cmpl $MP_FREE_LOCK,%eax ; \
470 je 2b ; \
471 incl PCPU(cnt)+V_FORWARDED_INTS ; \
472 subl $12,%esp ; \
473 movl $irq_num,8(%esp) ; \
474 movl $forward_fastint_remote,4(%esp) ; \
475 movl %eax,(%esp) ; \
476 call lwkt_send_ipiq_bycpu ; \
477 addl $12,%esp ; \
478 jmp 5f ;
479
480#endif
481
482
483/*
484 * Interrupt threads run this as their main loop.
485 *
486 * The handler begins execution outside a critical section and with the BGL
487 * held.
488 *
489 * The i_running state starts at 0. When an interrupt occurs, the hardware
490 * interrupt is disabled and sched_ithd() The HW interrupt remains disabled
491 * until all routines have run. We then call ithread_done() to reenable
492 * the HW interrupt and deschedule us until the next interrupt.
493 *
494 * We are responsible for atomically checking i_running and ithread_done()
495 * is responsible for atomically checking for platform-specific delayed
496 * interrupts. i_running for our irq is only set in the context of our cpu,
497 * so a critical section is a sufficient interlock.
498 */
499#define LIVELOCK_TIMEFRAME(freq) ((freq) >> 2) /* 1/4 second */
500
501static void
502ithread_handler(void *arg)
503{
504 struct intr_info *info;
505 u_int cputicks;
506 u_int bticks;
507 int intr;
508 int freq;
509 struct intrec **list;
510 intrec_t rec, nrec;
511 globaldata_t gd = mycpu;
512 struct systimer ill_timer; /* enforced freq. timer */
513 struct systimer ill_rtimer; /* recovery timer */
514 u_int ill_count = 0; /* interrupt livelock counter */
515 u_int ill_ticks = 0; /* track elapsed to calculate freq */
516 u_int ill_delta = 0; /* track elapsed to calculate freq */
517 int ill_state = 0; /* current state */
518
519 intr = (int)arg;
520 info = &intr_info_ary[intr];
521 list = &info->i_reclist;
522 gd = mycpu;
523
524 /*
525 * The loop must be entered with one critical section held.
526 */
527 crit_enter_gd(gd);
528
529 for (;;) {
530 /*
531 * We can get woken up by the livelock periodic code too, run the
532 * handlers only if there is a real interrupt pending. XXX
533 *
534 * Clear i_running prior to running the handlers to interlock
535 * again new events occuring during processing of existing events.
536 *
537 * Run each handler in a critical section. Note that we run both
538 * FAST and SLOW designated service routines.
539 */
540 info->i_running = 0;
541 for (rec = *list; rec; rec = nrec) {
542 nrec = rec->next;
543 if (rec->serializer) {
544 lwkt_serialize_handler_call(rec->serializer,
545 rec->handler, rec->argument, NULL);
546 } else {
547 rec->handler(rec->argument, NULL);
548 }
549 }
550
551 /*
552 * Do a quick exit/enter to catch any higher-priority
553 * interrupt sources and so user/system/interrupt statistics
554 * work for interrupt threads.
555 */
556 crit_exit_gd(gd);
557 crit_enter_gd(gd);
558
559 /*
560 * This is our interrupt hook to add rate randomness to the random
561 * number generator.
562 */
563 if (info->i_random.sc_enabled)
564 add_interrupt_randomness(intr);
565
566 /*
567 * This is our livelock test. If we hit the rate limit we
568 * limit ourselves to X interrupts/sec until the rate
569 * falls below 50% of that value, then we unlimit again.
570 *
571 * XXX calling cputimer_count() is expensive but a livelock may
572 * prevent other interrupts from occuring so we cannot use ticks.
573 */
574 cputicks = sys_cputimer->count();
575 ++ill_count;
576 bticks = cputicks - ill_ticks;
577 ill_ticks = cputicks;
578 if (bticks > sys_cputimer->freq)
579 bticks = sys_cputimer->freq;
580
581 switch(ill_state) {
582 case LIVELOCK_NONE:
583 ill_delta += bticks;
584 if (ill_delta < LIVELOCK_TIMEFRAME(sys_cputimer->freq))
585 break;
586 freq = (int64_t)ill_count * sys_cputimer->freq /
587 ill_delta;
588 ill_delta = 0;
589 ill_count = 0;
590 if (freq < livelock_limit)
591 break;
592 printf("intr %d at %d hz, livelocked! limiting at %d hz\n",
593 intr, freq, livelock_fallback);
594 ill_state = LIVELOCK_LIMITED;
595 bticks = 0;
596 /* force periodic check to avoid stale removal (if ints stop) */
597 systimer_init_periodic(&ill_rtimer, ithread_livelock_wakeup,
598 (void *)intr, 1);
599 /* fall through */
600 case LIVELOCK_LIMITED:
601 /*
602 * Delay (us) before rearming the interrupt
603 */
604 systimer_init_oneshot(&ill_timer, ithread_livelock_wakeup,
605 (void *)intr, 1 + 1000000 / livelock_fallback);
606 lwkt_deschedule_self(curthread);
607 lwkt_switch();
608
609 /* in case we were woken up by something else */
610 systimer_del(&ill_timer);
611
612 /*
613 * Calculate interrupt rate (note that due to our delay it
614 * will not exceed livelock_fallback).
615 */
616 ill_delta += bticks;
617 if (ill_delta < LIVELOCK_TIMEFRAME(sys_cputimer->freq))
618 break;
619 freq = (int64_t)ill_count * sys_cputimer->freq / ill_delta;
620 ill_delta = 0;
621 ill_count = 0;
622 if (freq < (livelock_fallback >> 1)) {
623 printf("intr %d at %d hz, removing livelock limit\n",
624 intr, freq);
625 ill_state = LIVELOCK_NONE;
626 systimer_del(&ill_rtimer);
627 }
628 break;
629 }
630
631 /*
632 * There are two races here. i_running is set by sched_ithd()
633 * in the context of our cpu and is critical-section safe. We
634 * are responsible for checking it. ipending is not critical
635 * section safe and must be handled by the platform specific
636 * ithread_done() routine.
637 */
638 if (info->i_running == 0)
639 ithread_done(intr);
640 /* must be in critical section on loop */
641 }
642 /* not reached */
643}
644
645/*
646 * Sysctls used by systat and others: hw.intrnames and hw.intrcnt.
647 * The data for this machine dependent, and the declarations are in machine
648 * dependent code. The layout of intrnames and intrcnt however is machine
649 * independent.
650 *
651 * We do not know the length of intrcnt and intrnames at compile time, so
652 * calculate things at run time.
653 */
654
655static int
656sysctl_intrnames(SYSCTL_HANDLER_ARGS)
657{
658 struct intr_info *info;
659 intrec_t rec;
660 int error = 0;
661 int len;
662 int intr;
663 char buf[64];
664
665 for (intr = 0; error == 0 && intr < NHWI + NSWI; ++intr) {
666 info = &intr_info_ary[intr];
667
668 len = 0;
669 buf[0] = 0;
670 for (rec = info->i_reclist; rec; rec = rec->next) {
671 snprintf(buf + len, sizeof(buf) - len, "%s%s",
672 (len ? "/" : ""), rec->name);
673 len += strlen(buf + len);
674 }
675 if (len == 0) {
676 snprintf(buf, sizeof(buf), "irq%d", intr);
677 len = strlen(buf);
678 }
679 error = SYSCTL_OUT(req, buf, len + 1);
680 }
681 return (error);
682}
683
684
685SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD,
686 NULL, 0, sysctl_intrnames, "", "Interrupt Names");
687
688static int
689sysctl_intrcnt(SYSCTL_HANDLER_ARGS)
690{
691 struct intr_info *info;
692 int error = 0;
693 int intr;
694
695 for (intr = 0; intr < NHWI + NSWI; ++intr) {
696 info = &intr_info_ary[intr];
697
698 error = SYSCTL_OUT(req, &info->i_count, sizeof(info->i_count));
699 if (error)
700 break;
701 }
702 return(error);
703}
704
705SYSCTL_PROC(_hw, OID_AUTO, intrcnt, CTLTYPE_OPAQUE | CTLFLAG_RD,
706 NULL, 0, sysctl_intrcnt, "", "Interrupt Counts");
707