From: Sepherosa Ziehau Date: Thu, 14 Apr 2011 05:10:55 +0000 (+0800) Subject: systimer: Fix statclock() intr% counting for i8254 interrupt cputimer X-Git-Url: https://gitweb.dragonflybsd.org/~mneumann/dragonfly.git/commitdiff_plain/96d52ac818036eee9591e612e829d78217bd2d3b systimer: Fix statclock() intr% counting for i8254 interrupt cputimer If i8254 is used as interrupt cputimer, it will send IPIs to APs to get statclock() called. In order to detect fast interrupt, statclock() tests gd_intr_nesting_level to see whether it should bump intr% or not, however, during IPI processing, gd_intr_nesting_level is bumped up, which tricks statclock() to believe that intr% should be incremented. This usually leads to 100% interrupt time on APs reported by utilities like systat(1). To solve this problem: - Pass in_ipi to systimer functions to indicate whether the functions are called during IPI processing or not. - In statclock(): if it is called during IPI processing, using (gd_intr_nesting_level - 1) to test whether intr% should be incremented or not. Additional cleanup: - Make systimer_init_*() interface stricter, by using "systimer_func_t" instead of "void *" --- diff --git a/sys/cpu/i386/include/cpu.h b/sys/cpu/i386/include/cpu.h index 13505a911a..1ed4711abf 100644 --- a/sys/cpu/i386/include/cpu.h +++ b/sys/cpu/i386/include/cpu.h @@ -63,7 +63,7 @@ #define cpu_swapin(p) /* nothing */ #define cpu_setstack(lp, ap) ((lp)->lwp_md.md_regs[SP] = (ap)) -#define CLKF_INTR(framep) (mycpu->gd_intr_nesting_level > 1 || (curthread->td_flags & TDF_INTTHREAD)) +#define CLKF_INTR(intr_nest) ((intr_nest) > 1 || (curthread->td_flags & TDF_INTTHREAD)) #define CLKF_PC(framep) ((framep)->if_eip) /* diff --git a/sys/cpu/x86_64/include/cpu.h b/sys/cpu/x86_64/include/cpu.h index a7fd24c647..a9a440e7cb 100644 --- a/sys/cpu/x86_64/include/cpu.h +++ b/sys/cpu/x86_64/include/cpu.h @@ -63,7 +63,7 @@ #define cpu_swapin(p) /* nothing */ #define cpu_setstack(lp, ap) ((lp)->lwp_md.md_regs[SP] = (ap)) -#define CLKF_INTR(framep) (mycpu->gd_intr_nesting_level > 1 || (curthread->td_flags & TDF_INTTHREAD)) +#define CLKF_INTR(intr_nest) ((intr_nest) > 1 || (curthread->td_flags & TDF_INTTHREAD)) #define CLKF_PC(framep) ((framep)->if_rip) /* diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index eeba4d8d22..a2d6fa333e 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -234,9 +234,9 @@ SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD, SYSCTL_PROC(_kern, OID_AUTO, basetime, CTLTYPE_STRUCT|CTLFLAG_RD, 0, 0, sysctl_get_basetime, "S,timespec", "System basetime"); -static void hardclock(systimer_t info, struct intrframe *frame); -static void statclock(systimer_t info, struct intrframe *frame); -static void schedclock(systimer_t info, struct intrframe *frame); +static void hardclock(systimer_t info, int, struct intrframe *frame); +static void statclock(systimer_t info, int, struct intrframe *frame); +static void schedclock(systimer_t info, int, struct intrframe *frame); static void getnanotime_nbt(struct timespec *nbt, struct timespec *tsp); int ticks; /* system master ticks at hz */ @@ -371,7 +371,7 @@ set_timeofday(struct timespec *ts) * manipulate objects owned by the current cpu. */ static void -hardclock(systimer_t info, struct intrframe *frame) +hardclock(systimer_t info, int in_ipi __unused, struct intrframe *frame) { sysclock_t cputicks; struct proc *p; @@ -597,7 +597,7 @@ hardclock(systimer_t info, struct intrframe *frame) * interrupted. */ static void -statclock(systimer_t info, struct intrframe *frame) +statclock(systimer_t info, int in_ipi, struct intrframe *frame) { #ifdef GPROF struct gmonparam *g; @@ -646,6 +646,16 @@ statclock(systimer_t info, struct intrframe *frame) else cpu_time.cp_user += bump; } else { + int intr_nest = mycpu->gd_intr_nesting_level; + + if (in_ipi) { + /* + * IPI processing code will bump gd_intr_nesting_level + * up by one, which breaks following CLKF_INTR testing, + * so we substract it by one here. + */ + --intr_nest; + } #ifdef GPROF /* * Kernel statistics are just like addupc_intr, only easier. @@ -674,12 +684,12 @@ statclock(systimer_t info, struct intrframe *frame) * XXX assume system if frame is NULL. A NULL frame * can occur if ipi processing is done from a crit_exit(). */ - if (frame && CLKF_INTR(frame)) + if (frame && CLKF_INTR(intr_nest)) td->td_iticks += bump; else td->td_sticks += bump; - if (frame && CLKF_INTR(frame)) { + if (frame && CLKF_INTR(intr_nest)) { #ifdef DEBUG_PCTRACK do_pctrack(frame, PCTRACK_INT); #endif @@ -754,7 +764,7 @@ SYSCTL_PROC(_kern, OID_AUTO, pctrack, (CTLTYPE_OPAQUE|CTLFLAG_RD), 0, 0, * Each cpu has its own scheduler clock. */ static void -schedclock(systimer_t info, struct intrframe *frame) +schedclock(systimer_t info, int in_ipi __unused, struct intrframe *frame) { struct lwp *lp; struct rusage *ru; diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c index 7d9fa22500..8661c2b1ef 100644 --- a/sys/kern/kern_intr.c +++ b/sys/kern/kern_intr.c @@ -117,7 +117,7 @@ int max_installed_soft_intr; static int sysctl_emergency_freq(SYSCTL_HANDLER_ARGS); static int sysctl_emergency_enable(SYSCTL_HANDLER_ARGS); -static void emergency_intr_timer_callback(systimer_t, struct intrframe *); +static void emergency_intr_timer_callback(systimer_t, int, struct intrframe *); static void ithread_handler(void *arg); static void ithread_emergency(void *arg); static void report_stray_interrupt(int intr, struct intr_info *info); @@ -583,7 +583,8 @@ report_stray_interrupt(int intr, struct intr_info *info) * might not be held). */ static void -ithread_livelock_wakeup(systimer_t st) +ithread_livelock_wakeup(systimer_t st, int in_ipi __unused, + struct intrframe *frame __unused) { struct intr_info *info; @@ -987,7 +988,8 @@ ithread_emergency(void *arg __unused) */ static void -emergency_intr_timer_callback(systimer_t info, struct intrframe *frame __unused) +emergency_intr_timer_callback(systimer_t info, int in_ipi __unused, + struct intrframe *frame __unused) { if (emergency_intr_enable) lwkt_schedule(info->data); diff --git a/sys/kern/kern_poll.c b/sys/kern/kern_poll.c index c2b310c52a..ae00c5b8c2 100644 --- a/sys/kern/kern_poll.c +++ b/sys/kern/kern_poll.c @@ -180,7 +180,7 @@ static void poll_sysctl_burstmax(netmsg_t); static void poll_sysctl_eachburst(netmsg_t); /* Systimer handler */ -static void pollclock(systimer_t, struct intrframe *); +static void pollclock(systimer_t, int, struct intrframe *); /* Sysctl handlers */ static int sysctl_pollhz(SYSCTL_HANDLER_ARGS); @@ -462,7 +462,8 @@ sysctl_eachburst(SYSCTL_HANDLER_ARGS) * WARNING! called from fastint or IPI, the MP lock might not be held. */ static void -pollclock(systimer_t info, struct intrframe *frame __unused) +pollclock(systimer_t info, int in_ipi __unused, + struct intrframe *frame __unused) { struct pollctx *pctx = info->data; struct timeval t; diff --git a/sys/kern/kern_systimer.c b/sys/kern/kern_systimer.c index 79a06791ec..2492b3f3d1 100644 --- a/sys/kern/kern_systimer.c +++ b/sys/kern/kern_systimer.c @@ -68,7 +68,7 @@ * hardclock, statclock, and other finely-timed routines. */ void -systimer_intr(sysclock_t *timep, int dummy, struct intrframe *frame) +systimer_intr(sysclock_t *timep, int in_ipi, struct intrframe *frame) { globaldata_t gd = mycpu; sysclock_t time = *timep; @@ -99,7 +99,7 @@ systimer_intr(sysclock_t *timep, int dummy, struct intrframe *frame) TAILQ_REMOVE(info->queue, info, node); gd->gd_systimer_inprog = info; crit_exit(); - info->func(info, frame); + info->func(info, in_ipi, frame); crit_enter(); /* @@ -236,7 +236,8 @@ systimer_del(systimer_t info) * the realtime clock. */ void -systimer_init_periodic(systimer_t info, void *func, void *data, int hz) +systimer_init_periodic(systimer_t info, systimer_func_t func, void *data, + int hz) { sysclock_t base_count; @@ -254,7 +255,8 @@ systimer_init_periodic(systimer_t info, void *func, void *data, int hz) } void -systimer_init_periodic_nq(systimer_t info, void *func, void *data, int hz) +systimer_init_periodic_nq(systimer_t info, systimer_func_t func, void *data, + int hz) { sysclock_t base_count; @@ -293,7 +295,7 @@ systimer_adjust_periodic(systimer_t info, int hz) * it to the system. The frequency is uncompensated and approximate. */ void -systimer_init_oneshot(systimer_t info, void *func, void *data, int us) +systimer_init_oneshot(systimer_t info, systimer_func_t func, void *data, int us) { bzero(info, sizeof(struct systimer)); info->time = sys_cputimer->count() + sys_cputimer->fromus(us); @@ -303,4 +305,3 @@ systimer_init_oneshot(systimer_t info, void *func, void *data, int us) info->gd = mycpu; systimer_add(info); } - diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index 65ea66d3d3..ae4d9383fc 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -284,7 +284,8 @@ sys_clock_getres(struct clock_getres_args *uap) * MPSAFE */ static void -ns1_systimer(systimer_t info) +ns1_systimer(systimer_t info, int in_ipi __unused, + struct intrframe *frame __unused) { lwkt_schedule(info->data); } diff --git a/sys/net/dummynet/ip_dummynet.c b/sys/net/dummynet/ip_dummynet.c index 5912120a4c..e9ad5437a5 100644 --- a/sys/net/dummynet/ip_dummynet.c +++ b/sys/net/dummynet/ip_dummynet.c @@ -186,7 +186,7 @@ static void ready_event_wfq(struct dn_pipe *); static int config_pipe(struct dn_ioc_pipe *); static void dummynet_flush(void); -static void dummynet_clock(systimer_t, struct intrframe *); +static void dummynet_clock(systimer_t, int, struct intrframe *); static void dummynet(netmsg_t); static struct dn_pipe *dn_find_pipe(int); @@ -1868,7 +1868,8 @@ dummynet_ctl(struct dn_sopt *dn_sopt) } static void -dummynet_clock(systimer_t info __unused, struct intrframe *frame __unused) +dummynet_clock(systimer_t info __unused, int in_ipi __unused, + struct intrframe *frame __unused) { KASSERT(mycpuid == ip_dn_cpu, ("dummynet systimer comes on cpu%d, should be %d!\n", diff --git a/sys/net/if_poll.c b/sys/net/if_poll.c index 407259fde7..dcaa210f96 100644 --- a/sys/net/if_poll.c +++ b/sys/net/if_poll.c @@ -239,8 +239,8 @@ static int sysctl_eachburst(SYSCTL_HANDLER_ARGS); static void poll_comm_init(int); static void poll_comm_start(int); static void poll_comm_adjust_pollhz(struct poll_comm *); -static void poll_comm_systimer0(systimer_t, struct intrframe *); -static void poll_comm_systimer(systimer_t, struct intrframe *); +static void poll_comm_systimer0(systimer_t, int, struct intrframe *); +static void poll_comm_systimer(systimer_t, int, struct intrframe *); static void sysctl_pollhz_handler(netmsg_t); static void sysctl_stfrac_handler(netmsg_t); static void sysctl_txfrac_handler(netmsg_t); @@ -1172,7 +1172,7 @@ static void poll_comm_start(int cpuid) { struct poll_comm *comm = poll_common[cpuid]; - void (*func)(systimer_t, struct intrframe *); + systimer_func_t func; /* * Initialize systimer @@ -1195,7 +1195,8 @@ _poll_comm_systimer(struct poll_comm *comm) } static void -poll_comm_systimer0(systimer_t info, struct intrframe *frame __unused) +poll_comm_systimer0(systimer_t info, int in_ipi __unused, + struct intrframe *frame __unused) { struct poll_comm *comm = info->data; globaldata_t gd = mycpu; @@ -1214,7 +1215,8 @@ poll_comm_systimer0(systimer_t info, struct intrframe *frame __unused) } static void -poll_comm_systimer(systimer_t info, struct intrframe *frame __unused) +poll_comm_systimer(systimer_t info, int in_ipi __unused, + struct intrframe *frame __unused) { struct poll_comm *comm = info->data; globaldata_t gd = mycpu; diff --git a/sys/platform/pc32/isa/clock.c b/sys/platform/pc32/isa/clock.c index abf5aebdad..f85e1e400f 100644 --- a/sys/platform/pc32/isa/clock.c +++ b/sys/platform/pc32/isa/clock.c @@ -212,7 +212,7 @@ clkintr(void *dummy, void *frame_arg) continue; if (gscan != gd) { lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, - &sysclock_count, 0); + &sysclock_count, 1); } else { systimer_intr(&sysclock_count, 0, frame_arg); } diff --git a/sys/platform/pc64/isa/clock.c b/sys/platform/pc64/isa/clock.c index ded5030533..03008184f8 100644 --- a/sys/platform/pc64/isa/clock.c +++ b/sys/platform/pc64/isa/clock.c @@ -214,7 +214,7 @@ clkintr(void *dummy, void *frame_arg) continue; if (gscan != gd) { lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, - &sysclock_count, 0); + &sysclock_count, 1); } else { systimer_intr(&sysclock_count, 0, frame_arg); } diff --git a/sys/sys/systimer.h b/sys/sys/systimer.h index 43e43e6fab..9c3bc1699c 100644 --- a/sys/sys/systimer.h +++ b/sys/sys/systimer.h @@ -58,15 +58,14 @@ struct intrframe; typedef __uint32_t sysclock_t; typedef TAILQ_HEAD(systimerq, systimer) *systimerq_t; -typedef void (*systimer_func_t)(struct systimer *); -typedef void (*systimer_func2_t)(struct systimer *, struct intrframe *); +typedef void (*systimer_func_t)(struct systimer *, int, struct intrframe *); typedef struct systimer { TAILQ_ENTRY(systimer) node; systimerq_t queue; sysclock_t time; /* absolute time next intr */ sysclock_t periodic; /* if non-zero */ - systimer_func2_t func; + systimer_func_t func; void *data; int flags; int freq; /* frequency if periodic */ @@ -82,10 +81,10 @@ void systimer_intr_enable(void); void systimer_intr(sysclock_t *, int, struct intrframe *); void systimer_add(systimer_t); void systimer_del(systimer_t); -void systimer_init_periodic(systimer_t, void *, void *, int); -void systimer_init_periodic_nq(systimer_t, void *, void *, int); +void systimer_init_periodic(systimer_t, systimer_func_t, void *, int); +void systimer_init_periodic_nq(systimer_t, systimer_func_t, void *, int); void systimer_adjust_periodic(systimer_t, int); -void systimer_init_oneshot(systimer_t, void *, void *, int); +void systimer_init_oneshot(systimer_t, systimer_func_t, void *, int); /* * cputimer interface. This provides a free-running (non-interrupt) @@ -127,7 +126,7 @@ extern struct cputimer *sys_cputimer; #define CPUTIMER_VKERNEL 4 #define CPUTIMER_HPET 5 #define CPUTIMER_GEODE 6 -#define CPUTIMER_CS5536 7 +#define CPUTIMER_CS5536 7 #define CPUTIMER_PRI_DUMMY -10 #define CPUTIMER_PRI_8254 0