| 1 | /*- |
| 2 | * Copyright (c) 1982, 1986, 1993 |
| 3 | * The Regents of the University of California. All rights reserved. |
| 4 | * |
| 5 | * Redistribution and use in source and binary forms, with or without |
| 6 | * modification, are permitted provided that the following conditions |
| 7 | * are met: |
| 8 | * 1. Redistributions of source code must retain the above copyright |
| 9 | * notice, this list of conditions and the following disclaimer. |
| 10 | * 2. Redistributions in binary form must reproduce the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer in the |
| 12 | * documentation and/or other materials provided with the distribution. |
| 13 | * 3. All advertising materials mentioning features or use of this software |
| 14 | * must display the following acknowledgement: |
| 15 | * This product includes software developed by the University of |
| 16 | * California, Berkeley and its contributors. |
| 17 | * 4. Neither the name of the University nor the names of its contributors |
| 18 | * may be used to endorse or promote products derived from this software |
| 19 | * without specific prior written permission. |
| 20 | * |
| 21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 31 | * SUCH DAMAGE. |
| 32 | * |
| 33 | * @(#)subr_prof.c 8.3 (Berkeley) 9/23/93 |
| 34 | * $FreeBSD: src/sys/kern/subr_prof.c,v 1.32.2.2 2000/08/03 00:09:32 ps Exp $ |
| 35 | * $DragonFly: src/sys/kern/subr_prof.c,v 1.14 2006/12/23 00:35:04 swildner Exp $ |
| 36 | */ |
| 37 | |
| 38 | #include <sys/param.h> |
| 39 | #include <sys/systm.h> |
| 40 | #include <sys/sysproto.h> |
| 41 | #include <sys/kernel.h> |
| 42 | #include <sys/proc.h> |
| 43 | #include <sys/resourcevar.h> |
| 44 | #include <sys/sysctl.h> |
| 45 | #include <sys/thread2.h> |
| 46 | |
| 47 | #include <machine/cpu.h> |
| 48 | |
| 49 | #ifdef GPROF |
| 50 | #include <sys/malloc.h> |
| 51 | #include <sys/gmon.h> |
| 52 | #undef MCOUNT |
| 53 | |
| 54 | static MALLOC_DEFINE(M_GPROF, "gprof", "kernel profiling buffer"); |
| 55 | |
| 56 | static void kmstartup (void *); |
| 57 | SYSINIT(kmem, SI_SUB_KPROF, SI_ORDER_FIRST, kmstartup, NULL) |
| 58 | |
| 59 | struct gmonparam _gmonparam = { GMON_PROF_OFF }; |
| 60 | |
| 61 | #ifdef GUPROF |
| 62 | #include <machine/asmacros.h> |
| 63 | |
| 64 | void |
| 65 | nullfunc_loop_profiled() |
| 66 | { |
| 67 | int i; |
| 68 | |
| 69 | for (i = 0; i < CALIB_SCALE; i++) |
| 70 | nullfunc_profiled(); |
| 71 | } |
| 72 | |
| 73 | #define nullfunc_loop_profiled_end nullfunc_profiled /* XXX */ |
| 74 | |
| 75 | void |
| 76 | nullfunc_profiled() |
| 77 | { |
| 78 | } |
| 79 | #endif /* GUPROF */ |
| 80 | |
| 81 | static void |
| 82 | kmstartup(void *dummy) |
| 83 | { |
| 84 | char *cp; |
| 85 | struct gmonparam *p = &_gmonparam; |
| 86 | #ifdef GUPROF |
| 87 | int cputime_overhead; |
| 88 | int empty_loop_time; |
| 89 | int i; |
| 90 | int mcount_overhead; |
| 91 | int mexitcount_overhead; |
| 92 | int nullfunc_loop_overhead; |
| 93 | int nullfunc_loop_profiled_time; |
| 94 | uintfptr_t tmp_addr; |
| 95 | #endif |
| 96 | |
| 97 | /* |
| 98 | * Round lowpc and highpc to multiples of the density we're using |
| 99 | * so the rest of the scaling (here and in gprof) stays in ints. |
| 100 | */ |
| 101 | p->lowpc = ROUNDDOWN((u_long)btext, HISTFRACTION * sizeof(HISTCOUNTER)); |
| 102 | p->highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER)); |
| 103 | p->textsize = p->highpc - p->lowpc; |
| 104 | kprintf("Profiling kernel, textsize=%lu [%x..%x]\n", |
| 105 | p->textsize, p->lowpc, p->highpc); |
| 106 | p->kcountsize = p->textsize / HISTFRACTION; |
| 107 | p->hashfraction = HASHFRACTION; |
| 108 | p->fromssize = p->textsize / HASHFRACTION; |
| 109 | p->tolimit = p->textsize * ARCDENSITY / 100; |
| 110 | if (p->tolimit < MINARCS) |
| 111 | p->tolimit = MINARCS; |
| 112 | else if (p->tolimit > MAXARCS) |
| 113 | p->tolimit = MAXARCS; |
| 114 | p->tossize = p->tolimit * sizeof(struct tostruct); |
| 115 | cp = (char *)malloc(p->kcountsize + p->fromssize + p->tossize, |
| 116 | M_GPROF, M_NOWAIT); |
| 117 | if (cp == 0) { |
| 118 | kprintf("No memory for profiling.\n"); |
| 119 | return; |
| 120 | } |
| 121 | bzero(cp, p->kcountsize + p->tossize + p->fromssize); |
| 122 | p->tos = (struct tostruct *)cp; |
| 123 | cp += p->tossize; |
| 124 | p->kcount = (HISTCOUNTER *)cp; |
| 125 | cp += p->kcountsize; |
| 126 | p->froms = (u_short *)cp; |
| 127 | |
| 128 | #ifdef GUPROF |
| 129 | /* Initialize pointers to overhead counters. */ |
| 130 | p->cputime_count = &KCOUNT(p, PC_TO_I(p, cputime)); |
| 131 | p->mcount_count = &KCOUNT(p, PC_TO_I(p, mcount)); |
| 132 | p->mexitcount_count = &KCOUNT(p, PC_TO_I(p, mexitcount)); |
| 133 | |
| 134 | /* |
| 135 | * Disable interrupts to avoid interference while we calibrate |
| 136 | * things. |
| 137 | */ |
| 138 | cpu_disable_intr(); |
| 139 | |
| 140 | /* |
| 141 | * Determine overheads. |
| 142 | * XXX this needs to be repeated for each useful timer/counter. |
| 143 | */ |
| 144 | cputime_overhead = 0; |
| 145 | startguprof(p); |
| 146 | for (i = 0; i < CALIB_SCALE; i++) |
| 147 | cputime_overhead += cputime(); |
| 148 | |
| 149 | empty_loop(); |
| 150 | startguprof(p); |
| 151 | empty_loop(); |
| 152 | empty_loop_time = cputime(); |
| 153 | |
| 154 | nullfunc_loop_profiled(); |
| 155 | |
| 156 | /* |
| 157 | * Start profiling. There won't be any normal function calls since |
| 158 | * interrupts are disabled, but we will call the profiling routines |
| 159 | * directly to determine their overheads. |
| 160 | */ |
| 161 | p->state = GMON_PROF_HIRES; |
| 162 | |
| 163 | startguprof(p); |
| 164 | nullfunc_loop_profiled(); |
| 165 | |
| 166 | startguprof(p); |
| 167 | for (i = 0; i < CALIB_SCALE; i++) |
| 168 | #if defined(__i386__) && __GNUC__ >= 2 |
| 169 | __asm("pushl %0; call __mcount; popl %%ecx" |
| 170 | : |
| 171 | : "i" (profil) |
| 172 | : "ax", "bx", "cx", "dx", "memory"); |
| 173 | #else |
| 174 | #error |
| 175 | #endif |
| 176 | mcount_overhead = KCOUNT(p, PC_TO_I(p, profil)); |
| 177 | |
| 178 | startguprof(p); |
| 179 | for (i = 0; i < CALIB_SCALE; i++) |
| 180 | #if defined(__i386__) && __GNUC__ >= 2 |
| 181 | __asm("call " __XSTRING(HIDENAME(mexitcount)) "; 1:" |
| 182 | : : : "ax", "bx", "cx", "dx", "memory"); |
| 183 | __asm("movl $1b,%0" : "=rm" (tmp_addr)); |
| 184 | #else |
| 185 | #error |
| 186 | #endif |
| 187 | mexitcount_overhead = KCOUNT(p, PC_TO_I(p, tmp_addr)); |
| 188 | |
| 189 | p->state = GMON_PROF_OFF; |
| 190 | stopguprof(p); |
| 191 | |
| 192 | cpu_enable_intr(); |
| 193 | |
| 194 | nullfunc_loop_profiled_time = 0; |
| 195 | for (tmp_addr = (uintfptr_t)nullfunc_loop_profiled; |
| 196 | tmp_addr < (uintfptr_t)nullfunc_loop_profiled_end; |
| 197 | tmp_addr += HISTFRACTION * sizeof(HISTCOUNTER)) |
| 198 | nullfunc_loop_profiled_time += KCOUNT(p, PC_TO_I(p, tmp_addr)); |
| 199 | #define CALIB_DOSCALE(count) (((count) + CALIB_SCALE / 3) / CALIB_SCALE) |
| 200 | #define c2n(count, freq) ((int)((count) * 1000000000LL / freq)) |
| 201 | kprintf("cputime %d, empty_loop %d, nullfunc_loop_profiled %d, mcount %d, mexitcount %d\n", |
| 202 | CALIB_DOSCALE(c2n(cputime_overhead, p->profrate)), |
| 203 | CALIB_DOSCALE(c2n(empty_loop_time, p->profrate)), |
| 204 | CALIB_DOSCALE(c2n(nullfunc_loop_profiled_time, p->profrate)), |
| 205 | CALIB_DOSCALE(c2n(mcount_overhead, p->profrate)), |
| 206 | CALIB_DOSCALE(c2n(mexitcount_overhead, p->profrate))); |
| 207 | cputime_overhead -= empty_loop_time; |
| 208 | mcount_overhead -= empty_loop_time; |
| 209 | mexitcount_overhead -= empty_loop_time; |
| 210 | |
| 211 | /*- |
| 212 | * Profiling overheads are determined by the times between the |
| 213 | * following events: |
| 214 | * MC1: mcount() is called |
| 215 | * MC2: cputime() (called from mcount()) latches the timer |
| 216 | * MC3: mcount() completes |
| 217 | * ME1: mexitcount() is called |
| 218 | * ME2: cputime() (called from mexitcount()) latches the timer |
| 219 | * ME3: mexitcount() completes. |
| 220 | * The times between the events vary slightly depending on instruction |
| 221 | * combination and cache misses, etc. Attempt to determine the |
| 222 | * minimum times. These can be subtracted from the profiling times |
| 223 | * without much risk of reducing the profiling times below what they |
| 224 | * would be when profiling is not configured. Abbreviate: |
| 225 | * ab = minimum time between MC1 and MC3 |
| 226 | * a = minumum time between MC1 and MC2 |
| 227 | * b = minimum time between MC2 and MC3 |
| 228 | * cd = minimum time between ME1 and ME3 |
| 229 | * c = minimum time between ME1 and ME2 |
| 230 | * d = minimum time between ME2 and ME3. |
| 231 | * These satisfy the relations: |
| 232 | * ab <= mcount_overhead (just measured) |
| 233 | * a + b <= ab |
| 234 | * cd <= mexitcount_overhead (just measured) |
| 235 | * c + d <= cd |
| 236 | * a + d <= nullfunc_loop_profiled_time (just measured) |
| 237 | * a >= 0, b >= 0, c >= 0, d >= 0. |
| 238 | * Assume that ab and cd are equal to the minimums. |
| 239 | */ |
| 240 | p->cputime_overhead = CALIB_DOSCALE(cputime_overhead); |
| 241 | p->mcount_overhead = CALIB_DOSCALE(mcount_overhead - cputime_overhead); |
| 242 | p->mexitcount_overhead = CALIB_DOSCALE(mexitcount_overhead |
| 243 | - cputime_overhead); |
| 244 | nullfunc_loop_overhead = nullfunc_loop_profiled_time - empty_loop_time; |
| 245 | p->mexitcount_post_overhead = CALIB_DOSCALE((mcount_overhead |
| 246 | - nullfunc_loop_overhead) |
| 247 | / 4); |
| 248 | p->mexitcount_pre_overhead = p->mexitcount_overhead |
| 249 | + p->cputime_overhead |
| 250 | - p->mexitcount_post_overhead; |
| 251 | p->mcount_pre_overhead = CALIB_DOSCALE(nullfunc_loop_overhead) |
| 252 | - p->mexitcount_post_overhead; |
| 253 | p->mcount_post_overhead = p->mcount_overhead |
| 254 | + p->cputime_overhead |
| 255 | - p->mcount_pre_overhead; |
| 256 | kprintf( |
| 257 | "Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d nsec\n", |
| 258 | c2n(p->cputime_overhead, p->profrate), |
| 259 | c2n(p->mcount_overhead, p->profrate), |
| 260 | c2n(p->mcount_pre_overhead, p->profrate), |
| 261 | c2n(p->mcount_post_overhead, p->profrate), |
| 262 | c2n(p->cputime_overhead, p->profrate), |
| 263 | c2n(p->mexitcount_overhead, p->profrate), |
| 264 | c2n(p->mexitcount_pre_overhead, p->profrate), |
| 265 | c2n(p->mexitcount_post_overhead, p->profrate)); |
| 266 | kprintf( |
| 267 | "Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d cycles\n", |
| 268 | p->cputime_overhead, p->mcount_overhead, |
| 269 | p->mcount_pre_overhead, p->mcount_post_overhead, |
| 270 | p->cputime_overhead, p->mexitcount_overhead, |
| 271 | p->mexitcount_pre_overhead, p->mexitcount_post_overhead); |
| 272 | #endif /* GUPROF */ |
| 273 | } |
| 274 | |
| 275 | /* |
| 276 | * Return kernel profiling information. |
| 277 | */ |
| 278 | static int |
| 279 | sysctl_kern_prof(SYSCTL_HANDLER_ARGS) |
| 280 | { |
| 281 | int *name = (int *) arg1; |
| 282 | u_int namelen = arg2; |
| 283 | struct gmonparam *gp = &_gmonparam; |
| 284 | int error; |
| 285 | int state; |
| 286 | |
| 287 | /* all sysctl names at this level are terminal */ |
| 288 | if (namelen != 1) |
| 289 | return (ENOTDIR); /* overloaded */ |
| 290 | |
| 291 | switch (name[0]) { |
| 292 | case GPROF_STATE: |
| 293 | state = gp->state; |
| 294 | error = sysctl_handle_int(oidp, &state, 0, req); |
| 295 | if (error) |
| 296 | return (error); |
| 297 | if (!req->newptr) |
| 298 | return (0); |
| 299 | if (state == GMON_PROF_OFF) { |
| 300 | gp->state = state; |
| 301 | stopprofclock(&proc0); |
| 302 | stopguprof(gp); |
| 303 | } else if (state == GMON_PROF_ON) { |
| 304 | gp->state = GMON_PROF_OFF; |
| 305 | stopguprof(gp); |
| 306 | gp->profrate = profhz; |
| 307 | startprofclock(&proc0); |
| 308 | gp->state = state; |
| 309 | #ifdef GUPROF |
| 310 | } else if (state == GMON_PROF_HIRES) { |
| 311 | gp->state = GMON_PROF_OFF; |
| 312 | stopprofclock(&proc0); |
| 313 | startguprof(gp); |
| 314 | gp->state = state; |
| 315 | #endif |
| 316 | } else if (state != gp->state) |
| 317 | return (EINVAL); |
| 318 | return (0); |
| 319 | case GPROF_COUNT: |
| 320 | return (sysctl_handle_opaque(oidp, |
| 321 | gp->kcount, gp->kcountsize, req)); |
| 322 | case GPROF_FROMS: |
| 323 | return (sysctl_handle_opaque(oidp, |
| 324 | gp->froms, gp->fromssize, req)); |
| 325 | case GPROF_TOS: |
| 326 | return (sysctl_handle_opaque(oidp, |
| 327 | gp->tos, gp->tossize, req)); |
| 328 | case GPROF_GMONPARAM: |
| 329 | return (sysctl_handle_opaque(oidp, gp, sizeof *gp, req)); |
| 330 | default: |
| 331 | return (EOPNOTSUPP); |
| 332 | } |
| 333 | /* NOTREACHED */ |
| 334 | } |
| 335 | |
| 336 | SYSCTL_NODE(_kern, KERN_PROF, prof, CTLFLAG_RW, sysctl_kern_prof, ""); |
| 337 | #endif /* GPROF */ |
| 338 | |
| 339 | /* |
| 340 | * Profiling system call. |
| 341 | * |
| 342 | * The scale factor is a fixed point number with 16 bits of fraction, so that |
| 343 | * 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling. |
| 344 | */ |
| 345 | /* ARGSUSED */ |
| 346 | int |
| 347 | sys_profil(struct profil_args *uap) |
| 348 | { |
| 349 | struct proc *p = curproc; |
| 350 | struct uprof *upp; |
| 351 | |
| 352 | if (uap->scale > (1 << 16)) |
| 353 | return (EINVAL); |
| 354 | if (uap->scale == 0) { |
| 355 | stopprofclock(p); |
| 356 | return (0); |
| 357 | } |
| 358 | upp = &p->p_prof; |
| 359 | |
| 360 | /* Block profile interrupts while changing state. */ |
| 361 | crit_enter(); |
| 362 | upp->pr_off = uap->offset; |
| 363 | upp->pr_scale = uap->scale; |
| 364 | upp->pr_base = uap->samples; |
| 365 | upp->pr_size = uap->size; |
| 366 | startprofclock(p); |
| 367 | crit_exit(); |
| 368 | |
| 369 | return (0); |
| 370 | } |
| 371 | |
| 372 | /* |
| 373 | * Scale is a fixed-point number with the binary point 16 bits |
| 374 | * into the value, and is <= 1.0. pc is at most 32 bits, so the |
| 375 | * intermediate result is at most 48 bits. |
| 376 | */ |
| 377 | #define PC_TO_INDEX(pc, prof) \ |
| 378 | ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ |
| 379 | (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) |
| 380 | |
| 381 | /* |
| 382 | * Collect user-level profiling statistics; called on a profiling tick, |
| 383 | * when a process is running in user-mode. This routine may be called |
| 384 | * from an interrupt context. We try to update the user profiling buffers |
| 385 | * cheaply with fuswintr() and suswintr(). If that fails, we revert to |
| 386 | * an AST that will vector us to trap() with a context in which copyin |
| 387 | * and copyout will work. Trap will then call addupc_task(). |
| 388 | * |
| 389 | * Note that we may (rarely) not get around to the AST soon enough, and |
| 390 | * lose profile ticks when the next tick overwrites this one, but in this |
| 391 | * case the system is overloaded and the profile is probably already |
| 392 | * inaccurate. |
| 393 | */ |
| 394 | void |
| 395 | addupc_intr(struct proc *p, u_long pc, u_int ticks) |
| 396 | { |
| 397 | struct uprof *prof; |
| 398 | caddr_t addr; |
| 399 | u_int i; |
| 400 | int v; |
| 401 | |
| 402 | if (ticks == 0) |
| 403 | return; |
| 404 | prof = &p->p_prof; |
| 405 | if (pc < prof->pr_off || |
| 406 | (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) |
| 407 | return; /* out of range; ignore */ |
| 408 | |
| 409 | addr = prof->pr_base + i; |
| 410 | if ((v = fuswintr(addr)) == -1 || suswintr(addr, v + ticks) == -1) { |
| 411 | prof->pr_addr = pc; |
| 412 | prof->pr_ticks = ticks; |
| 413 | need_proftick(); |
| 414 | } |
| 415 | } |
| 416 | |
| 417 | /* |
| 418 | * Much like before, but we can afford to take faults here. If the |
| 419 | * update fails, we simply turn off profiling. |
| 420 | */ |
| 421 | void |
| 422 | addupc_task(struct proc *p, u_long pc, u_int ticks) |
| 423 | { |
| 424 | struct uprof *prof; |
| 425 | caddr_t addr; |
| 426 | u_int i; |
| 427 | u_short v; |
| 428 | |
| 429 | /* Testing P_PROFIL may be unnecessary, but is certainly safe. */ |
| 430 | if ((p->p_flag & P_PROFIL) == 0 || ticks == 0) |
| 431 | return; |
| 432 | |
| 433 | prof = &p->p_prof; |
| 434 | if (pc < prof->pr_off || |
| 435 | (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) |
| 436 | return; |
| 437 | |
| 438 | addr = prof->pr_base + i; |
| 439 | if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) { |
| 440 | v += ticks; |
| 441 | if (copyout((caddr_t)&v, addr, sizeof(v)) == 0) |
| 442 | return; |
| 443 | } |
| 444 | stopprofclock(p); |
| 445 | } |