| Commit | Line | Data |
|---|---|---|
| 8c10bfcf MD |
1 | /* |
| 2 | * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. | |
| 3 | * | |
| 4 | * This code is derived from software contributed to The DragonFly Project | |
| 5 | * by Matthew Dillon <dillon@backplane.com> | |
| 6 | * | |
| 7 | * Redistribution and use in source and binary forms, with or without | |
| 8 | * modification, are permitted provided that the following conditions | |
| 9 | * are met: | |
| 10 | * | |
| 11 | * 1. Redistributions of source code must retain the above copyright | |
| 12 | * notice, this list of conditions and the following disclaimer. | |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer in | |
| 15 | * the documentation and/or other materials provided with the | |
| 16 | * distribution. | |
| 17 | * 3. Neither the name of The DragonFly Project nor the names of its | |
| 18 | * contributors may be used to endorse or promote products derived | |
| 19 | * from this software without specific, prior written permission. | |
| 20 | * | |
| 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
| 24 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
| 25 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
| 26 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
| 27 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 28 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
| 29 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 30 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
| 31 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 32 | * SUCH DAMAGE. | |
| 33 | * | |
| 984263bc MD |
34 | * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org> |
| 35 | * Copyright (c) 1982, 1986, 1991, 1993 | |
| 36 | * The Regents of the University of California. All rights reserved. | |
| 37 | * (c) UNIX System Laboratories, Inc. | |
| 38 | * All or some portions of this file are derived from material licensed | |
| 39 | * to the University of California by American Telephone and Telegraph | |
| 40 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
| 41 | * the permission of UNIX System Laboratories, Inc. | |
| 42 | * | |
| 43 | * Redistribution and use in source and binary forms, with or without | |
| 44 | * modification, are permitted provided that the following conditions | |
| 45 | * are met: | |
| 46 | * 1. Redistributions of source code must retain the above copyright | |
| 47 | * notice, this list of conditions and the following disclaimer. | |
| 48 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 49 | * notice, this list of conditions and the following disclaimer in the | |
| 50 | * documentation and/or other materials provided with the distribution. | |
| 51 | * 3. All advertising materials mentioning features or use of this software | |
| 52 | * must display the following acknowledgement: | |
| 53 | * This product includes software developed by the University of | |
| 54 | * California, Berkeley and its contributors. | |
| 55 | * 4. Neither the name of the University nor the names of its contributors | |
| 56 | * may be used to endorse or promote products derived from this software | |
| 57 | * without specific prior written permission. | |
| 58 | * | |
| 59 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 60 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 61 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 62 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 63 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 64 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 65 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 66 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 67 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 68 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 69 | * SUCH DAMAGE. | |
| 70 | * | |
| 71 | * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 | |
| 72 | * $FreeBSD: src/sys/kern/kern_clock.c,v 1.105.2.10 2002/10/17 13:19:40 maxim Exp $ | |
| c730be20 | 73 | * $DragonFly: src/sys/kern/kern_clock.c,v 1.62 2008/09/09 04:06:13 dillon Exp $ |
| 984263bc MD |
74 | */ |
| 75 | ||
| 76 | #include "opt_ntp.h" | |
| 2b71c8f1 | 77 | #include "opt_polling.h" |
| b3a7093f | 78 | #include "opt_ifpoll.h" |
| 07522099 | 79 | #include "opt_pctrack.h" |
| 984263bc MD |
80 | |
| 81 | #include <sys/param.h> | |
| 82 | #include <sys/systm.h> | |
| 984263bc MD |
83 | #include <sys/callout.h> |
| 84 | #include <sys/kernel.h> | |
| f5d21610 | 85 | #include <sys/kinfo.h> |
| 984263bc MD |
86 | #include <sys/proc.h> |
| 87 | #include <sys/malloc.h> | |
| 88 | #include <sys/resourcevar.h> | |
| 89 | #include <sys/signalvar.h> | |
| 90 | #include <sys/timex.h> | |
| 91 | #include <sys/timepps.h> | |
| 92 | #include <vm/vm.h> | |
| 93 | #include <sys/lock.h> | |
| 94 | #include <vm/pmap.h> | |
| 95 | #include <vm/vm_map.h> | |
| 5ffd1608 | 96 | #include <vm/vm_extern.h> |
| 984263bc | 97 | #include <sys/sysctl.h> |
| 2689779e | 98 | #include <sys/thread2.h> |
| 984263bc MD |
99 | |
| 100 | #include <machine/cpu.h> | |
| 101 | #include <machine/limits.h> | |
| 102 | #include <machine/smp.h> | |
| 103 | ||
| 104 | #ifdef GPROF | |
| 105 | #include <sys/gmon.h> | |
| 106 | #endif | |
| 107 | ||
| 108 | #ifdef DEVICE_POLLING | |
| 94ebffcd | 109 | extern void init_device_poll_pcpu(int); |
| 3e61f60e | 110 | #endif |
| 984263bc | 111 | |
| b3a7093f SZ |
112 | #ifdef IFPOLL_ENABLE |
| 113 | extern void ifpoll_init_pcpu(int); | |
| 114 | #endif | |
| 115 | ||
| 07522099 MD |
116 | #ifdef DEBUG_PCTRACK |
| 117 | static void do_pctrack(struct intrframe *frame, int which); | |
| 118 | #endif | |
| 119 | ||
| 402ed7e1 | 120 | static void initclocks (void *dummy); |
| ba39e2e0 | 121 | SYSINIT(clocks, SI_BOOT2_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) |
| 984263bc | 122 | |
| 6ad39cae MD |
123 | /* |
| 124 | * Some of these don't belong here, but it's easiest to concentrate them. | |
| 9eea7f0c | 125 | * Note that cpu_time counts in microseconds, but most userland programs |
| 6ad39cae MD |
126 | * just compare relative times against the total by delta. |
| 127 | */ | |
| 9eea7f0c | 128 | struct kinfo_cputime cputime_percpu[MAXCPU]; |
| 07522099 MD |
129 | #ifdef DEBUG_PCTRACK |
| 130 | struct kinfo_pcheader cputime_pcheader = { PCTRACK_SIZE, PCTRACK_ARYSIZE }; | |
| 131 | struct kinfo_pctrack cputime_pctrack[MAXCPU][PCTRACK_SIZE]; | |
| 132 | #endif | |
| 133 | ||
| 9eea7f0c HP |
134 | #ifdef SMP |
| 135 | static int | |
| 136 | sysctl_cputime(SYSCTL_HANDLER_ARGS) | |
| 137 | { | |
| 138 | int cpu, error = 0; | |
| 139 | size_t size = sizeof(struct kinfo_cputime); | |
| 140 | ||
| 141 | for (cpu = 0; cpu < ncpus; ++cpu) { | |
| 142 | if ((error = SYSCTL_OUT(req, &cputime_percpu[cpu], size))) | |
| 143 | break; | |
| 144 | } | |
| 984263bc | 145 | |
| 9eea7f0c HP |
146 | return (error); |
| 147 | } | |
| 148 | SYSCTL_PROC(_kern, OID_AUTO, cputime, (CTLTYPE_OPAQUE|CTLFLAG_RD), 0, 0, | |
| 149 | sysctl_cputime, "S,kinfo_cputime", "CPU time statistics"); | |
| 150 | #else | |
| 151 | SYSCTL_STRUCT(_kern, OID_AUTO, cputime, CTLFLAG_RD, &cpu_time, kinfo_cputime, | |
| 152 | "CPU time statistics"); | |
| 153 | #endif | |
| 984263bc | 154 | |
| 88c4d2f6 MD |
155 | /* |
| 156 | * boottime is used to calculate the 'real' uptime. Do not confuse this with | |
| 157 | * microuptime(). microtime() is not drift compensated. The real uptime | |
| 60b2809b MD |
158 | * with compensation is nanotime() - bootime. boottime is recalculated |
| 159 | * whenever the real time is set based on the compensated elapsed time | |
| 160 | * in seconds (gd->gd_time_seconds). | |
| 88c4d2f6 | 161 | * |
| 88c4d2f6 MD |
162 | * The gd_time_seconds and gd_cpuclock_base fields remain fairly monotonic. |
| 163 | * Slight adjustments to gd_cpuclock_base are made to phase-lock it to | |
| 164 | * the real time. | |
| 165 | */ | |
| 166 | struct timespec boottime; /* boot time (realtime) for reference only */ | |
| 88c4d2f6 | 167 | time_t time_second; /* read-only 'passive' uptime in seconds */ |
| 984263bc | 168 | |
| 5eb5a6bc MD |
169 | /* |
| 170 | * basetime is used to calculate the compensated real time of day. The | |
| 171 | * basetime can be modified on a per-tick basis by the adjtime(), | |
| 172 | * ntp_adjtime(), and sysctl-based time correction APIs. | |
| 173 | * | |
| 174 | * Note that frequency corrections can also be made by adjusting | |
| 175 | * gd_cpuclock_base. | |
| 176 | * | |
| 177 | * basetime is a tail-chasing FIFO, updated only by cpu #0. The FIFO is | |
| 178 | * used on both SMP and UP systems to avoid MP races between cpu's and | |
| 179 | * interrupt races on UP systems. | |
| 180 | */ | |
| 181 | #define BASETIME_ARYSIZE 16 | |
| 182 | #define BASETIME_ARYMASK (BASETIME_ARYSIZE - 1) | |
| 183 | static struct timespec basetime[BASETIME_ARYSIZE]; | |
| 184 | static volatile int basetime_index; | |
| 185 | ||
| 186 | static int | |
| 187 | sysctl_get_basetime(SYSCTL_HANDLER_ARGS) | |
| 188 | { | |
| 189 | struct timespec *bt; | |
| 190 | int error; | |
| 35238fa5 | 191 | int index; |
| 5eb5a6bc | 192 | |
| 35238fa5 MD |
193 | /* |
| 194 | * Because basetime data and index may be updated by another cpu, | |
| 195 | * a load fence is required to ensure that the data we read has | |
| 196 | * not been speculatively read relative to a possibly updated index. | |
| 197 | */ | |
| 198 | index = basetime_index; | |
| 199 | cpu_lfence(); | |
| 200 | bt = &basetime[index]; | |
| 08f95c49 | 201 | error = SYSCTL_OUT(req, bt, sizeof(*bt)); |
| 5eb5a6bc MD |
202 | return (error); |
| 203 | } | |
| 204 | ||
| 984263bc | 205 | SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD, |
| 08f95c49 | 206 | &boottime, timespec, "System boottime"); |
| 5eb5a6bc | 207 | SYSCTL_PROC(_kern, OID_AUTO, basetime, CTLTYPE_STRUCT|CTLFLAG_RD, 0, 0, |
| 08f95c49 | 208 | sysctl_get_basetime, "S,timespec", "System basetime"); |
| 984263bc | 209 | |
| 88c4d2f6 MD |
210 | static void hardclock(systimer_t info, struct intrframe *frame); |
| 211 | static void statclock(systimer_t info, struct intrframe *frame); | |
| 212 | static void schedclock(systimer_t info, struct intrframe *frame); | |
| 5eb5a6bc | 213 | static void getnanotime_nbt(struct timespec *nbt, struct timespec *tsp); |
| 88c4d2f6 MD |
214 | |
| 215 | int ticks; /* system master ticks at hz */ | |
| da3639ef | 216 | int clocks_running; /* tsleep/timeout clocks operational */ |
| 88c4d2f6 MD |
217 | int64_t nsec_adj; /* ntpd per-tick adjustment in nsec << 32 */ |
| 218 | int64_t nsec_acc; /* accumulator */ | |
| 984263bc | 219 | |
| 4026c000 JS |
220 | /* NTPD time correction fields */ |
| 221 | int64_t ntp_tick_permanent; /* per-tick adjustment in nsec << 32 */ | |
| 222 | int64_t ntp_tick_acc; /* accumulator for per-tick adjustment */ | |
| 223 | int64_t ntp_delta; /* one-time correction in nsec */ | |
| 224 | int64_t ntp_big_delta = 1000000000; | |
| 225 | int32_t ntp_tick_delta; /* current adjustment rate */ | |
| 226 | int32_t ntp_default_tick_delta; /* adjustment rate for ntp_delta */ | |
| 48590578 JS |
227 | time_t ntp_leap_second; /* time of next leap second */ |
| 228 | int ntp_leap_insert; /* whether to insert or remove a second */ | |
| 4026c000 | 229 | |
| 984263bc | 230 | /* |
| 88c4d2f6 | 231 | * Finish initializing clock frequencies and start all clocks running. |
| 984263bc | 232 | */ |
| 88c4d2f6 MD |
233 | /* ARGSUSED*/ |
| 234 | static void | |
| 235 | initclocks(void *dummy) | |
| 984263bc | 236 | { |
| 88c4d2f6 MD |
237 | /*psratio = profhz / stathz;*/ |
| 238 | initclocks_pcpu(); | |
| da3639ef | 239 | clocks_running = 1; |
| 984263bc MD |
240 | } |
| 241 | ||
| 88c4d2f6 MD |
242 | /* |
| 243 | * Called on a per-cpu basis | |
| 244 | */ | |
| 245 | void | |
| 246 | initclocks_pcpu(void) | |
| 247 | { | |
| 248 | struct globaldata *gd = mycpu; | |
| 984263bc | 249 | |
| 88c4d2f6 MD |
250 | crit_enter(); |
| 251 | if (gd->gd_cpuid == 0) { | |
| 252 | gd->gd_time_seconds = 1; | |
| 044ee7c4 | 253 | gd->gd_cpuclock_base = sys_cputimer->count(); |
| 88c4d2f6 MD |
254 | } else { |
| 255 | /* XXX */ | |
| 256 | gd->gd_time_seconds = globaldata_find(0)->gd_time_seconds; | |
| 257 | gd->gd_cpuclock_base = globaldata_find(0)->gd_cpuclock_base; | |
| 258 | } | |
| 0d1dffdf | 259 | |
| 94ebffcd SZ |
260 | #ifdef DEVICE_POLLING |
| 261 | init_device_poll_pcpu(gd->gd_cpuid); | |
| 262 | #endif | |
| 263 | ||
| b3a7093f SZ |
264 | #ifdef IFPOLL_ENABLE |
| 265 | ifpoll_init_pcpu(gd->gd_cpuid); | |
| 266 | #endif | |
| 267 | ||
| 0d1dffdf MD |
268 | /* |
| 269 | * Use a non-queued periodic systimer to prevent multiple ticks from | |
| 270 | * building up if the sysclock jumps forward (8254 gets reset). The | |
| 271 | * sysclock will never jump backwards. Our time sync is based on | |
| 272 | * the actual sysclock, not the ticks count. | |
| 273 | */ | |
| 274 | systimer_init_periodic_nq(&gd->gd_hardclock, hardclock, NULL, hz); | |
| 275 | systimer_init_periodic_nq(&gd->gd_statclock, statclock, NULL, stathz); | |
| 88c4d2f6 | 276 | /* XXX correct the frequency for scheduler / estcpu tests */ |
| 0d1dffdf | 277 | systimer_init_periodic_nq(&gd->gd_schedclock, schedclock, |
| 8478264a | 278 | NULL, ESTCPUFREQ); |
| 88c4d2f6 MD |
279 | crit_exit(); |
| 280 | } | |
| 984263bc MD |
281 | |
| 282 | /* | |
| 88c4d2f6 MD |
283 | * This sets the current real time of day. Timespecs are in seconds and |
| 284 | * nanoseconds. We do not mess with gd_time_seconds and gd_cpuclock_base, | |
| 285 | * instead we adjust basetime so basetime + gd_* results in the current | |
| 286 | * time of day. This way the gd_* fields are guarenteed to represent | |
| 287 | * a monotonically increasing 'uptime' value. | |
| 5eb5a6bc MD |
288 | * |
| 289 | * When set_timeofday() is called from userland, the system call forces it | |
| 290 | * onto cpu #0 since only cpu #0 can update basetime_index. | |
| 984263bc | 291 | */ |
| 88c4d2f6 MD |
292 | void |
| 293 | set_timeofday(struct timespec *ts) | |
| 294 | { | |
| 5eb5a6bc MD |
295 | struct timespec *nbt; |
| 296 | int ni; | |
| 984263bc | 297 | |
| 88c4d2f6 MD |
298 | /* |
| 299 | * XXX SMP / non-atomic basetime updates | |
| 300 | */ | |
| 301 | crit_enter(); | |
| 5eb5a6bc MD |
302 | ni = (basetime_index + 1) & BASETIME_ARYMASK; |
| 303 | nbt = &basetime[ni]; | |
| 304 | nanouptime(nbt); | |
| 305 | nbt->tv_sec = ts->tv_sec - nbt->tv_sec; | |
| 306 | nbt->tv_nsec = ts->tv_nsec - nbt->tv_nsec; | |
| 307 | if (nbt->tv_nsec < 0) { | |
| 308 | nbt->tv_nsec += 1000000000; | |
| 309 | --nbt->tv_sec; | |
| 88c4d2f6 | 310 | } |
| a81931cc MD |
311 | |
| 312 | /* | |
| 313 | * Note that basetime diverges from boottime as the clock drift is | |
| 314 | * compensated for, so we cannot do away with boottime. When setting | |
| 315 | * the absolute time of day the drift is 0 (for an instant) and we | |
| 316 | * can simply assign boottime to basetime. | |
| 317 | * | |
| 318 | * Note that nanouptime() is based on gd_time_seconds which is drift | |
| 319 | * compensated up to a point (it is guarenteed to remain monotonically | |
| 320 | * increasing). gd_time_seconds is thus our best uptime guess and | |
| 321 | * suitable for use in the boottime calculation. It is already taken | |
| 322 | * into account in the basetime calculation above. | |
| 323 | */ | |
| 5eb5a6bc | 324 | boottime.tv_sec = nbt->tv_sec; |
| 4026c000 | 325 | ntp_delta = 0; |
| 5eb5a6bc MD |
326 | |
| 327 | /* | |
| 35238fa5 MD |
328 | * We now have a new basetime, make sure all other cpus have it, |
| 329 | * then update the index. | |
| 5eb5a6bc | 330 | */ |
| 35238fa5 | 331 | cpu_sfence(); |
| 5eb5a6bc MD |
332 | basetime_index = ni; |
| 333 | ||
| 88c4d2f6 MD |
334 | crit_exit(); |
| 335 | } | |
| 336 | ||
| 984263bc | 337 | /* |
| 88c4d2f6 MD |
338 | * Each cpu has its own hardclock, but we only increments ticks and softticks |
| 339 | * on cpu #0. | |
| 340 | * | |
| 341 | * NOTE! systimer! the MP lock might not be held here. We can only safely | |
| 342 | * manipulate objects owned by the current cpu. | |
| 984263bc | 343 | */ |
| 984263bc | 344 | static void |
| 88c4d2f6 | 345 | hardclock(systimer_t info, struct intrframe *frame) |
| 984263bc | 346 | { |
| 88c4d2f6 MD |
347 | sysclock_t cputicks; |
| 348 | struct proc *p; | |
| 88c4d2f6 | 349 | struct globaldata *gd = mycpu; |
| 984263bc MD |
350 | |
| 351 | /* | |
| 88c4d2f6 MD |
352 | * Realtime updates are per-cpu. Note that timer corrections as |
| 353 | * returned by microtime() and friends make an additional adjustment | |
| 354 | * using a system-wise 'basetime', but the running time is always | |
| 355 | * taken from the per-cpu globaldata area. Since the same clock | |
| 356 | * is distributing (XXX SMP) to all cpus, the per-cpu timebases | |
| 357 | * stay in synch. | |
| 358 | * | |
| 359 | * Note that we never allow info->time (aka gd->gd_hardclock.time) | |
| fad57d0e MD |
360 | * to reverse index gd_cpuclock_base, but that it is possible for |
| 361 | * it to temporarily get behind in the seconds if something in the | |
| 362 | * system locks interrupts for a long period of time. Since periodic | |
| 363 | * timers count events, though everything should resynch again | |
| 364 | * immediately. | |
| 984263bc | 365 | */ |
| 88c4d2f6 | 366 | cputicks = info->time - gd->gd_cpuclock_base; |
| 044ee7c4 | 367 | if (cputicks >= sys_cputimer->freq) { |
| 88c4d2f6 | 368 | ++gd->gd_time_seconds; |
| 044ee7c4 | 369 | gd->gd_cpuclock_base += sys_cputimer->freq; |
| 88c4d2f6 | 370 | } |
| 984263bc MD |
371 | |
| 372 | /* | |
| 92b561b7 MD |
373 | * The system-wide ticks counter and NTP related timedelta/tickdelta |
| 374 | * adjustments only occur on cpu #0. NTP adjustments are accomplished | |
| 375 | * by updating basetime. | |
| 984263bc | 376 | */ |
| 88c4d2f6 | 377 | if (gd->gd_cpuid == 0) { |
| 5eb5a6bc | 378 | struct timespec *nbt; |
| 88c4d2f6 MD |
379 | struct timespec nts; |
| 380 | int leap; | |
| 5eb5a6bc | 381 | int ni; |
| 984263bc | 382 | |
| 88c4d2f6 | 383 | ++ticks; |
| 984263bc | 384 | |
| 88c4d2f6 MD |
385 | #if 0 |
| 386 | if (tco->tc_poll_pps) | |
| 387 | tco->tc_poll_pps(tco); | |
| 388 | #endif | |
| 5eb5a6bc | 389 | |
| 88c4d2f6 | 390 | /* |
| 5eb5a6bc MD |
391 | * Calculate the new basetime index. We are in a critical section |
| 392 | * on cpu #0 and can safely play with basetime_index. Start | |
| 393 | * with the current basetime and then make adjustments. | |
| 394 | */ | |
| 395 | ni = (basetime_index + 1) & BASETIME_ARYMASK; | |
| 396 | nbt = &basetime[ni]; | |
| 397 | *nbt = basetime[basetime_index]; | |
| 398 | ||
| 399 | /* | |
| 400 | * Apply adjtime corrections. (adjtime() API) | |
| 401 | * | |
| 402 | * adjtime() only runs on cpu #0 so our critical section is | |
| 403 | * sufficient to access these variables. | |
| 88c4d2f6 | 404 | */ |
| 4026c000 | 405 | if (ntp_delta != 0) { |
| 5eb5a6bc | 406 | nbt->tv_nsec += ntp_tick_delta; |
| 4026c000 JS |
407 | ntp_delta -= ntp_tick_delta; |
| 408 | if ((ntp_delta > 0 && ntp_delta < ntp_tick_delta) || | |
| 409 | (ntp_delta < 0 && ntp_delta > ntp_tick_delta)) { | |
| 5eb5a6bc | 410 | ntp_tick_delta = ntp_delta; |
| 4026c000 JS |
411 | } |
| 412 | } | |
| 413 | ||
| 5eb5a6bc MD |
414 | /* |
| 415 | * Apply permanent frequency corrections. (sysctl API) | |
| 416 | */ | |
| 4026c000 JS |
417 | if (ntp_tick_permanent != 0) { |
| 418 | ntp_tick_acc += ntp_tick_permanent; | |
| 419 | if (ntp_tick_acc >= (1LL << 32)) { | |
| 5eb5a6bc | 420 | nbt->tv_nsec += ntp_tick_acc >> 32; |
| 331bc6f8 | 421 | ntp_tick_acc -= (ntp_tick_acc >> 32) << 32; |
| 4026c000 | 422 | } else if (ntp_tick_acc <= -(1LL << 32)) { |
| 331bc6f8 | 423 | /* Negate ntp_tick_acc to avoid shifting the sign bit. */ |
| 5eb5a6bc | 424 | nbt->tv_nsec -= (-ntp_tick_acc) >> 32; |
| 331bc6f8 | 425 | ntp_tick_acc += ((-ntp_tick_acc) >> 32) << 32; |
| 4026c000 JS |
426 | } |
| 427 | } | |
| 428 | ||
| 5eb5a6bc MD |
429 | if (nbt->tv_nsec >= 1000000000) { |
| 430 | nbt->tv_sec++; | |
| 431 | nbt->tv_nsec -= 1000000000; | |
| 432 | } else if (nbt->tv_nsec < 0) { | |
| 433 | nbt->tv_sec--; | |
| 434 | nbt->tv_nsec += 1000000000; | |
| 88c4d2f6 MD |
435 | } |
| 436 | ||
| 437 | /* | |
| 5eb5a6bc | 438 | * Another per-tick compensation. (for ntp_adjtime() API) |
| 88c4d2f6 | 439 | */ |
| 5eb5a6bc | 440 | if (nsec_adj != 0) { |
| 88c4d2f6 MD |
441 | nsec_acc += nsec_adj; |
| 442 | if (nsec_acc >= 0x100000000LL) { | |
| 5eb5a6bc | 443 | nbt->tv_nsec += nsec_acc >> 32; |
| 88c4d2f6 MD |
444 | nsec_acc = (nsec_acc & 0xFFFFFFFFLL); |
| 445 | } else if (nsec_acc <= -0x100000000LL) { | |
| 5eb5a6bc | 446 | nbt->tv_nsec -= -nsec_acc >> 32; |
| 88c4d2f6 MD |
447 | nsec_acc = -(-nsec_acc & 0xFFFFFFFFLL); |
| 448 | } | |
| 5eb5a6bc MD |
449 | if (nbt->tv_nsec >= 1000000000) { |
| 450 | nbt->tv_nsec -= 1000000000; | |
| 451 | ++nbt->tv_sec; | |
| 452 | } else if (nbt->tv_nsec < 0) { | |
| 453 | nbt->tv_nsec += 1000000000; | |
| 454 | --nbt->tv_sec; | |
| 455 | } | |
| 456 | } | |
| 457 | ||
| 458 | /************************************************************ | |
| 459 | * LEAP SECOND CORRECTION * | |
| 460 | ************************************************************ | |
| 461 | * | |
| 462 | * Taking into account all the corrections made above, figure | |
| 463 | * out the new real time. If the seconds field has changed | |
| 464 | * then apply any pending leap-second corrections. | |
| 465 | */ | |
| 466 | getnanotime_nbt(nbt, &nts); | |
| 467 | ||
| 32040d57 MD |
468 | if (time_second != nts.tv_sec) { |
| 469 | /* | |
| 470 | * Apply leap second (sysctl API). Adjust nts for changes | |
| 471 | * so we do not have to call getnanotime_nbt again. | |
| 472 | */ | |
| 473 | if (ntp_leap_second) { | |
| 474 | if (ntp_leap_second == nts.tv_sec) { | |
| 475 | if (ntp_leap_insert) { | |
| 476 | nbt->tv_sec++; | |
| 477 | nts.tv_sec++; | |
| 478 | } else { | |
| 479 | nbt->tv_sec--; | |
| 480 | nts.tv_sec--; | |
| 481 | } | |
| 5eb5a6bc | 482 | ntp_leap_second--; |
| 32040d57 | 483 | } |
| 88c4d2f6 | 484 | } |
| 88c4d2f6 | 485 | |
| 32040d57 MD |
486 | /* |
| 487 | * Apply leap second (ntp_adjtime() API), calculate a new | |
| 488 | * nsec_adj field. ntp_update_second() returns nsec_adj | |
| 489 | * as a per-second value but we need it as a per-tick value. | |
| 490 | */ | |
| 88c4d2f6 | 491 | leap = ntp_update_second(time_second, &nsec_adj); |
| 88c4d2f6 | 492 | nsec_adj /= hz; |
| 32040d57 MD |
493 | nbt->tv_sec += leap; |
| 494 | nts.tv_sec += leap; | |
| 495 | ||
| 496 | /* | |
| 497 | * Update the time_second 'approximate time' global. | |
| 498 | */ | |
| 499 | time_second = nts.tv_sec; | |
| 88c4d2f6 | 500 | } |
| 5eb5a6bc MD |
501 | |
| 502 | /* | |
| 503 | * Finally, our new basetime is ready to go live! | |
| 504 | */ | |
| 35238fa5 | 505 | cpu_sfence(); |
| 5eb5a6bc | 506 | basetime_index = ni; |
| 5ffd1608 MD |
507 | |
| 508 | /* | |
| 509 | * Figure out how badly the system is starved for memory | |
| 510 | */ | |
| 511 | vm_fault_ratecheck(); | |
| 88c4d2f6 MD |
512 | } |
| 513 | ||
| 514 | /* | |
| 92b561b7 MD |
515 | * softticks are handled for all cpus |
| 516 | */ | |
| 517 | hardclock_softtick(gd); | |
| 518 | ||
| 519 | /* | |
| c730be20 MD |
520 | * The LWKT scheduler will generally allow the current process to |
| 521 | * return to user mode even if there are other runnable LWKT threads | |
| 522 | * running in kernel mode on behalf of a user process. This will | |
| 523 | * ensure that those other threads have an opportunity to run in | |
| 524 | * fairly short order (but not instantly). | |
| 525 | */ | |
| 526 | need_lwkt_resched(); | |
| 527 | ||
| 528 | /* | |
| 84204577 | 529 | * ITimer handling is per-tick, per-cpu. I don't think ksignal() |
| 88c4d2f6 MD |
530 | * is mpsafe on curproc, so XXX get the mplock. |
| 531 | */ | |
| 532 | if ((p = curproc) != NULL && try_mplock()) { | |
| 88c4d2f6 | 533 | if (frame && CLKF_USERMODE(frame) && |
| 93328593 SS |
534 | timevalisset(&p->p_timer[ITIMER_VIRTUAL].it_value) && |
| 535 | itimerdecr(&p->p_timer[ITIMER_VIRTUAL], tick) == 0) | |
| 84204577 | 536 | ksignal(p, SIGVTALRM); |
| 93328593 SS |
537 | if (timevalisset(&p->p_timer[ITIMER_PROF].it_value) && |
| 538 | itimerdecr(&p->p_timer[ITIMER_PROF], tick) == 0) | |
| 84204577 | 539 | ksignal(p, SIGPROF); |
| 88c4d2f6 | 540 | rel_mplock(); |
| 984263bc | 541 | } |
| 604e1e09 | 542 | setdelayed(); |
| 88c4d2f6 | 543 | } |
| 984263bc | 544 | |
| 88c4d2f6 MD |
545 | /* |
| 546 | * The statistics clock typically runs at a 125Hz rate, and is intended | |
| 547 | * to be frequency offset from the hardclock (typ 100Hz). It is per-cpu. | |
| 548 | * | |
| 549 | * NOTE! systimer! the MP lock might not be held here. We can only safely | |
| 550 | * manipulate objects owned by the current cpu. | |
| 551 | * | |
| 552 | * The stats clock is responsible for grabbing a profiling sample. | |
| 553 | * Most of the statistics are only used by user-level statistics programs. | |
| 554 | * The main exceptions are p->p_uticks, p->p_sticks, p->p_iticks, and | |
| 555 | * p->p_estcpu. | |
| 556 | * | |
| 557 | * Like the other clocks, the stat clock is called from what is effectively | |
| 558 | * a fast interrupt, so the context should be the thread/process that got | |
| 559 | * interrupted. | |
| 560 | */ | |
| 561 | static void | |
| 562 | statclock(systimer_t info, struct intrframe *frame) | |
| 563 | { | |
| 564 | #ifdef GPROF | |
| 565 | struct gmonparam *g; | |
| 566 | int i; | |
| 984263bc | 567 | #endif |
| 88c4d2f6 MD |
568 | thread_t td; |
| 569 | struct proc *p; | |
| 570 | int bump; | |
| 571 | struct timeval tv; | |
| 572 | struct timeval *stv; | |
| 984263bc MD |
573 | |
| 574 | /* | |
| 88c4d2f6 | 575 | * How big was our timeslice relative to the last time? |
| 984263bc | 576 | */ |
| 88c4d2f6 MD |
577 | microuptime(&tv); /* mpsafe */ |
| 578 | stv = &mycpu->gd_stattv; | |
| 579 | if (stv->tv_sec == 0) { | |
| 580 | bump = 1; | |
| 581 | } else { | |
| 582 | bump = tv.tv_usec - stv->tv_usec + | |
| 583 | (tv.tv_sec - stv->tv_sec) * 1000000; | |
| 584 | if (bump < 0) | |
| 585 | bump = 0; | |
| 586 | if (bump > 1000000) | |
| 587 | bump = 1000000; | |
| 588 | } | |
| 589 | *stv = tv; | |
| 984263bc | 590 | |
| 88c4d2f6 MD |
591 | td = curthread; |
| 592 | p = td->td_proc; | |
| 984263bc | 593 | |
| 88c4d2f6 MD |
594 | if (frame && CLKF_USERMODE(frame)) { |
| 595 | /* | |
| 596 | * Came from userland, handle user time and deal with | |
| 597 | * possible process. | |
| 598 | */ | |
| 599 | if (p && (p->p_flag & P_PROFIL)) | |
| 600 | addupc_intr(p, CLKF_PC(frame), 1); | |
| 601 | td->td_uticks += bump; | |
| 984263bc | 602 | |
| 88c4d2f6 MD |
603 | /* |
| 604 | * Charge the time as appropriate | |
| 605 | */ | |
| 606 | if (p && p->p_nice > NZERO) | |
| 9eea7f0c | 607 | cpu_time.cp_nice += bump; |
| 88c4d2f6 | 608 | else |
| 9eea7f0c | 609 | cpu_time.cp_user += bump; |
| 88c4d2f6 MD |
610 | } else { |
| 611 | #ifdef GPROF | |
| 612 | /* | |
| 613 | * Kernel statistics are just like addupc_intr, only easier. | |
| 614 | */ | |
| 615 | g = &_gmonparam; | |
| 616 | if (g->state == GMON_PROF_ON && frame) { | |
| 617 | i = CLKF_PC(frame) - g->lowpc; | |
| 618 | if (i < g->textsize) { | |
| 619 | i /= HISTFRACTION * sizeof(*g->kcount); | |
| 620 | g->kcount[i]++; | |
| 621 | } | |
| 622 | } | |
| 623 | #endif | |
| 624 | /* | |
| 625 | * Came from kernel mode, so we were: | |
| 626 | * - handling an interrupt, | |
| 627 | * - doing syscall or trap work on behalf of the current | |
| 628 | * user process, or | |
| 629 | * - spinning in the idle loop. | |
| 630 | * Whichever it is, charge the time as appropriate. | |
| 631 | * Note that we charge interrupts to the current process, | |
| 632 | * regardless of whether they are ``for'' that process, | |
| 633 | * so that we know how much of its real time was spent | |
| 634 | * in ``non-process'' (i.e., interrupt) work. | |
| 635 | * | |
| 636 | * XXX assume system if frame is NULL. A NULL frame | |
| e43a034f | 637 | * can occur if ipi processing is done from a crit_exit(). |
| 88c4d2f6 MD |
638 | */ |
| 639 | if (frame && CLKF_INTR(frame)) | |
| 640 | td->td_iticks += bump; | |
| 641 | else | |
| 642 | td->td_sticks += bump; | |
| 643 | ||
| 644 | if (frame && CLKF_INTR(frame)) { | |
| 07522099 MD |
645 | #ifdef DEBUG_PCTRACK |
| 646 | do_pctrack(frame, PCTRACK_INT); | |
| 647 | #endif | |
| 9eea7f0c | 648 | cpu_time.cp_intr += bump; |
| 88c4d2f6 | 649 | } else { |
| 07522099 | 650 | if (td == &mycpu->gd_idlethread) { |
| 9eea7f0c | 651 | cpu_time.cp_idle += bump; |
| 07522099 MD |
652 | } else { |
| 653 | #ifdef DEBUG_PCTRACK | |
| 654 | if (frame) | |
| 655 | do_pctrack(frame, PCTRACK_SYS); | |
| 656 | #endif | |
| 9eea7f0c | 657 | cpu_time.cp_sys += bump; |
| 07522099 | 658 | } |
| 88c4d2f6 MD |
659 | } |
| 660 | } | |
| 661 | } | |
| 662 | ||
| 07522099 MD |
663 | #ifdef DEBUG_PCTRACK |
| 664 | /* | |
| 665 | * Sample the PC when in the kernel or in an interrupt. User code can | |
| 666 | * retrieve the information and generate a histogram or other output. | |
| 667 | */ | |
| 668 | ||
| 669 | static void | |
| 670 | do_pctrack(struct intrframe *frame, int which) | |
| 671 | { | |
| 672 | struct kinfo_pctrack *pctrack; | |
| 673 | ||
| 674 | pctrack = &cputime_pctrack[mycpu->gd_cpuid][which]; | |
| 675 | pctrack->pc_array[pctrack->pc_index & PCTRACK_ARYMASK] = | |
| 676 | (void *)CLKF_PC(frame); | |
| 677 | ++pctrack->pc_index; | |
| 678 | } | |
| 679 | ||
| 680 | static int | |
| 681 | sysctl_pctrack(SYSCTL_HANDLER_ARGS) | |
| 682 | { | |
| 683 | struct kinfo_pcheader head; | |
| 684 | int error; | |
| 685 | int cpu; | |
| 686 | int ntrack; | |
| 687 | ||
| 688 | head.pc_ntrack = PCTRACK_SIZE; | |
| 689 | head.pc_arysize = PCTRACK_ARYSIZE; | |
| 690 | ||
| 691 | if ((error = SYSCTL_OUT(req, &head, sizeof(head))) != 0) | |
| 692 | return (error); | |
| 693 | ||
| 694 | for (cpu = 0; cpu < ncpus; ++cpu) { | |
| 695 | for (ntrack = 0; ntrack < PCTRACK_SIZE; ++ntrack) { | |
| 696 | error = SYSCTL_OUT(req, &cputime_pctrack[cpu][ntrack], | |
| 697 | sizeof(struct kinfo_pctrack)); | |
| 698 | if (error) | |
| 699 | break; | |
| 700 | } | |
| 701 | if (error) | |
| 702 | break; | |
| 703 | } | |
| 704 | return (error); | |
| 705 | } | |
| 706 | SYSCTL_PROC(_kern, OID_AUTO, pctrack, (CTLTYPE_OPAQUE|CTLFLAG_RD), 0, 0, | |
| 707 | sysctl_pctrack, "S,kinfo_pcheader", "CPU PC tracking"); | |
| 708 | ||
| 709 | #endif | |
| 710 | ||
| 88c4d2f6 | 711 | /* |
| dcc99b62 | 712 | * The scheduler clock typically runs at a 50Hz rate. NOTE! systimer, |
| 88c4d2f6 MD |
713 | * the MP lock might not be held. We can safely manipulate parts of curproc |
| 714 | * but that's about it. | |
| dcc99b62 MD |
715 | * |
| 716 | * Each cpu has its own scheduler clock. | |
| 88c4d2f6 MD |
717 | */ |
| 718 | static void | |
| 719 | schedclock(systimer_t info, struct intrframe *frame) | |
| 720 | { | |
| 553ea3c8 | 721 | struct lwp *lp; |
| 88c4d2f6 MD |
722 | struct rusage *ru; |
| 723 | struct vmspace *vm; | |
| 724 | long rss; | |
| 725 | ||
| 553ea3c8 | 726 | if ((lp = lwkt_preempted_proc()) != NULL) { |
| dcc99b62 MD |
727 | /* |
| 728 | * Account for cpu time used and hit the scheduler. Note | |
| 729 | * that this call MUST BE MP SAFE, and the BGL IS NOT HELD | |
| 730 | * HERE. | |
| 731 | */ | |
| 553ea3c8 | 732 | ++lp->lwp_cpticks; |
| 5681a38a MD |
733 | lp->lwp_proc->p_usched->schedulerclock(lp, info->periodic, |
| 734 | info->time); | |
| dcc99b62 | 735 | } |
| 553ea3c8 | 736 | if ((lp = curthread->td_lwp) != NULL) { |
| dcc99b62 MD |
737 | /* |
| 738 | * Update resource usage integrals and maximums. | |
| 739 | */ | |
| fde7ac71 | 740 | if ((ru = &lp->lwp_proc->p_ru) && |
| 553ea3c8 | 741 | (vm = lp->lwp_proc->p_vmspace) != NULL) { |
| 88c4d2f6 MD |
742 | ru->ru_ixrss += pgtok(vm->vm_tsize); |
| 743 | ru->ru_idrss += pgtok(vm->vm_dsize); | |
| 744 | ru->ru_isrss += pgtok(vm->vm_ssize); | |
| 745 | rss = pgtok(vmspace_resident_count(vm)); | |
| 746 | if (ru->ru_maxrss < rss) | |
| 747 | ru->ru_maxrss = rss; | |
| 748 | } | |
| b68b7282 | 749 | } |
| 984263bc MD |
750 | } |
| 751 | ||
| 752 | /* | |
| a94976ad MD |
753 | * Compute number of ticks for the specified amount of time. The |
| 754 | * return value is intended to be used in a clock interrupt timed | |
| 755 | * operation and guarenteed to meet or exceed the requested time. | |
| 756 | * If the representation overflows, return INT_MAX. The minimum return | |
| 757 | * value is 1 ticks and the function will average the calculation up. | |
| 758 | * If any value greater then 0 microseconds is supplied, a value | |
| 759 | * of at least 2 will be returned to ensure that a near-term clock | |
| 760 | * interrupt does not cause the timeout to occur (degenerately) early. | |
| 761 | * | |
| 762 | * Note that limit checks must take into account microseconds, which is | |
| 763 | * done simply by using the smaller signed long maximum instead of | |
| 764 | * the unsigned long maximum. | |
| 765 | * | |
| 766 | * If ints have 32 bits, then the maximum value for any timeout in | |
| 767 | * 10ms ticks is 248 days. | |
| 984263bc MD |
768 | */ |
| 769 | int | |
| a94976ad | 770 | tvtohz_high(struct timeval *tv) |
| 984263bc | 771 | { |
| a94976ad | 772 | int ticks; |
| 1fd87d54 | 773 | long sec, usec; |
| 984263bc | 774 | |
| 984263bc MD |
775 | sec = tv->tv_sec; |
| 776 | usec = tv->tv_usec; | |
| 777 | if (usec < 0) { | |
| 778 | sec--; | |
| 779 | usec += 1000000; | |
| 780 | } | |
| 781 | if (sec < 0) { | |
| 782 | #ifdef DIAGNOSTIC | |
| 783 | if (usec > 0) { | |
| 784 | sec++; | |
| 785 | usec -= 1000000; | |
| 786 | } | |
| 05c3b7cb | 787 | kprintf("tvtohz_high: negative time difference %ld sec %ld usec\n", |
| 984263bc MD |
788 | sec, usec); |
| 789 | #endif | |
| 790 | ticks = 1; | |
| a94976ad MD |
791 | } else if (sec <= INT_MAX / hz) { |
| 792 | ticks = (int)(sec * hz + | |
| 793 | ((u_long)usec + (tick - 1)) / tick) + 1; | |
| 794 | } else { | |
| 795 | ticks = INT_MAX; | |
| 796 | } | |
| 797 | return (ticks); | |
| 798 | } | |
| 799 | ||
| 800 | /* | |
| 801 | * Compute number of ticks for the specified amount of time, erroring on | |
| 802 | * the side of it being too low to ensure that sleeping the returned number | |
| 803 | * of ticks will not result in a late return. | |
| 804 | * | |
| 805 | * The supplied timeval may not be negative and should be normalized. A | |
| 806 | * return value of 0 is possible if the timeval converts to less then | |
| 807 | * 1 tick. | |
| 808 | * | |
| 809 | * If ints have 32 bits, then the maximum value for any timeout in | |
| 810 | * 10ms ticks is 248 days. | |
| 811 | */ | |
| 812 | int | |
| 813 | tvtohz_low(struct timeval *tv) | |
| 814 | { | |
| 815 | int ticks; | |
| 816 | long sec; | |
| 817 | ||
| 818 | sec = tv->tv_sec; | |
| 819 | if (sec <= INT_MAX / hz) | |
| 820 | ticks = (int)(sec * hz + (u_long)tv->tv_usec / tick); | |
| 984263bc | 821 | else |
| 984263bc | 822 | ticks = INT_MAX; |
| a94976ad | 823 | return (ticks); |
| 984263bc MD |
824 | } |
| 825 | ||
| a94976ad | 826 | |
| 984263bc MD |
827 | /* |
| 828 | * Start profiling on a process. | |
| 829 | * | |
| 830 | * Kernel profiling passes proc0 which never exits and hence | |
| 831 | * keeps the profile clock running constantly. | |
| 832 | */ | |
| 833 | void | |
| 88c4d2f6 | 834 | startprofclock(struct proc *p) |
| 984263bc | 835 | { |
| 984263bc MD |
836 | if ((p->p_flag & P_PROFIL) == 0) { |
| 837 | p->p_flag |= P_PROFIL; | |
| 88c4d2f6 | 838 | #if 0 /* XXX */ |
| 984263bc | 839 | if (++profprocs == 1 && stathz != 0) { |
| e43a034f | 840 | crit_enter(); |
| 6ad39cae | 841 | psdiv = psratio; |
| 984263bc | 842 | setstatclockrate(profhz); |
| e43a034f | 843 | crit_exit(); |
| 984263bc | 844 | } |
| 88c4d2f6 | 845 | #endif |
| 984263bc MD |
846 | } |
| 847 | } | |
| 848 | ||
| 849 | /* | |
| 850 | * Stop profiling on a process. | |
| 851 | */ | |
| 852 | void | |
| 88c4d2f6 | 853 | stopprofclock(struct proc *p) |
| 984263bc | 854 | { |
| 984263bc MD |
855 | if (p->p_flag & P_PROFIL) { |
| 856 | p->p_flag &= ~P_PROFIL; | |
| 88c4d2f6 | 857 | #if 0 /* XXX */ |
| 984263bc | 858 | if (--profprocs == 0 && stathz != 0) { |
| e43a034f | 859 | crit_enter(); |
| 6ad39cae | 860 | psdiv = 1; |
| 984263bc | 861 | setstatclockrate(stathz); |
| e43a034f | 862 | crit_exit(); |
| 984263bc | 863 | } |
| 984263bc | 864 | #endif |
| 984263bc MD |
865 | } |
| 866 | } | |
| 867 | ||
| 868 | /* | |
| 869 | * Return information about system clocks. | |
| 870 | */ | |
| 871 | static int | |
| 872 | sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) | |
| 873 | { | |
| f5d21610 | 874 | struct kinfo_clockinfo clkinfo; |
| 984263bc MD |
875 | /* |
| 876 | * Construct clockinfo structure. | |
| 877 | */ | |
| f5d21610 JS |
878 | clkinfo.ci_hz = hz; |
| 879 | clkinfo.ci_tick = tick; | |
| 4026c000 | 880 | clkinfo.ci_tickadj = ntp_default_tick_delta / 1000; |
| f5d21610 JS |
881 | clkinfo.ci_profhz = profhz; |
| 882 | clkinfo.ci_stathz = stathz ? stathz : hz; | |
| 984263bc MD |
883 | return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); |
| 884 | } | |
| 885 | ||
| 886 | SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, | |
| 887 | 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); | |
| 888 | ||
| 984263bc MD |
889 | /* |
| 890 | * We have eight functions for looking at the clock, four for | |
| 891 | * microseconds and four for nanoseconds. For each there is fast | |
| 892 | * but less precise version "get{nano|micro}[up]time" which will | |
| 893 | * return a time which is up to 1/HZ previous to the call, whereas | |
| 894 | * the raw version "{nano|micro}[up]time" will return a timestamp | |
| 895 | * which is as precise as possible. The "up" variants return the | |
| 896 | * time relative to system boot, these are well suited for time | |
| 897 | * interval measurements. | |
| 88c4d2f6 MD |
898 | * |
| 899 | * Each cpu independantly maintains the current time of day, so all | |
| 900 | * we need to do to protect ourselves from changes is to do a loop | |
| 901 | * check on the seconds field changing out from under us. | |
| fad57d0e MD |
902 | * |
| 903 | * The system timer maintains a 32 bit count and due to various issues | |
| 904 | * it is possible for the calculated delta to occassionally exceed | |
| 044ee7c4 MD |
905 | * sys_cputimer->freq. If this occurs the sys_cputimer->freq64_nsec |
| 906 | * multiplication can easily overflow, so we deal with the case. For | |
| 907 | * uniformity we deal with the case in the usec case too. | |
| 984263bc | 908 | */ |
| 984263bc MD |
909 | void |
| 910 | getmicrouptime(struct timeval *tvp) | |
| 911 | { | |
| 88c4d2f6 MD |
912 | struct globaldata *gd = mycpu; |
| 913 | sysclock_t delta; | |
| 914 | ||
| 915 | do { | |
| 916 | tvp->tv_sec = gd->gd_time_seconds; | |
| 917 | delta = gd->gd_hardclock.time - gd->gd_cpuclock_base; | |
| 918 | } while (tvp->tv_sec != gd->gd_time_seconds); | |
| fad57d0e | 919 | |
| 044ee7c4 MD |
920 | if (delta >= sys_cputimer->freq) { |
| 921 | tvp->tv_sec += delta / sys_cputimer->freq; | |
| 922 | delta %= sys_cputimer->freq; | |
| fad57d0e | 923 | } |
| 044ee7c4 | 924 | tvp->tv_usec = (sys_cputimer->freq64_usec * delta) >> 32; |
| 88c4d2f6 MD |
925 | if (tvp->tv_usec >= 1000000) { |
| 926 | tvp->tv_usec -= 1000000; | |
| 927 | ++tvp->tv_sec; | |
| 984263bc MD |
928 | } |
| 929 | } | |
| 930 | ||
| 931 | void | |
| 932 | getnanouptime(struct timespec *tsp) | |
| 933 | { | |
| 88c4d2f6 MD |
934 | struct globaldata *gd = mycpu; |
| 935 | sysclock_t delta; | |
| 936 | ||
| 937 | do { | |
| 938 | tsp->tv_sec = gd->gd_time_seconds; | |
| 939 | delta = gd->gd_hardclock.time - gd->gd_cpuclock_base; | |
| 940 | } while (tsp->tv_sec != gd->gd_time_seconds); | |
| fad57d0e | 941 | |
| 044ee7c4 MD |
942 | if (delta >= sys_cputimer->freq) { |
| 943 | tsp->tv_sec += delta / sys_cputimer->freq; | |
| 944 | delta %= sys_cputimer->freq; | |
| 984263bc | 945 | } |
| 044ee7c4 | 946 | tsp->tv_nsec = (sys_cputimer->freq64_nsec * delta) >> 32; |
| 984263bc MD |
947 | } |
| 948 | ||
| 949 | void | |
| 88c4d2f6 | 950 | microuptime(struct timeval *tvp) |
| 984263bc | 951 | { |
| 88c4d2f6 MD |
952 | struct globaldata *gd = mycpu; |
| 953 | sysclock_t delta; | |
| 954 | ||
| 955 | do { | |
| 956 | tvp->tv_sec = gd->gd_time_seconds; | |
| 044ee7c4 | 957 | delta = sys_cputimer->count() - gd->gd_cpuclock_base; |
| 88c4d2f6 | 958 | } while (tvp->tv_sec != gd->gd_time_seconds); |
| fad57d0e | 959 | |
| 044ee7c4 MD |
960 | if (delta >= sys_cputimer->freq) { |
| 961 | tvp->tv_sec += delta / sys_cputimer->freq; | |
| 962 | delta %= sys_cputimer->freq; | |
| 984263bc | 963 | } |
| 044ee7c4 | 964 | tvp->tv_usec = (sys_cputimer->freq64_usec * delta) >> 32; |
| 984263bc MD |
965 | } |
| 966 | ||
| 967 | void | |
| 88c4d2f6 | 968 | nanouptime(struct timespec *tsp) |
| 984263bc | 969 | { |
| 88c4d2f6 MD |
970 | struct globaldata *gd = mycpu; |
| 971 | sysclock_t delta; | |
| 972 | ||
| 973 | do { | |
| 974 | tsp->tv_sec = gd->gd_time_seconds; | |
| 044ee7c4 | 975 | delta = sys_cputimer->count() - gd->gd_cpuclock_base; |
| 88c4d2f6 | 976 | } while (tsp->tv_sec != gd->gd_time_seconds); |
| fad57d0e | 977 | |
| 044ee7c4 MD |
978 | if (delta >= sys_cputimer->freq) { |
| 979 | tsp->tv_sec += delta / sys_cputimer->freq; | |
| 980 | delta %= sys_cputimer->freq; | |
| 984263bc | 981 | } |
| 044ee7c4 | 982 | tsp->tv_nsec = (sys_cputimer->freq64_nsec * delta) >> 32; |
| 984263bc MD |
983 | } |
| 984 | ||
| 88c4d2f6 MD |
985 | /* |
| 986 | * realtime routines | |
| 987 | */ | |
| 984263bc MD |
988 | |
| 989 | void | |
| 88c4d2f6 | 990 | getmicrotime(struct timeval *tvp) |
| 984263bc | 991 | { |
| 88c4d2f6 | 992 | struct globaldata *gd = mycpu; |
| 5eb5a6bc | 993 | struct timespec *bt; |
| 88c4d2f6 | 994 | sysclock_t delta; |
| 984263bc | 995 | |
| 88c4d2f6 MD |
996 | do { |
| 997 | tvp->tv_sec = gd->gd_time_seconds; | |
| 998 | delta = gd->gd_hardclock.time - gd->gd_cpuclock_base; | |
| 999 | } while (tvp->tv_sec != gd->gd_time_seconds); | |
| fad57d0e | 1000 | |
| 044ee7c4 MD |
1001 | if (delta >= sys_cputimer->freq) { |
| 1002 | tvp->tv_sec += delta / sys_cputimer->freq; | |
| 1003 | delta %= sys_cputimer->freq; | |
| fad57d0e | 1004 | } |
| 044ee7c4 | 1005 | tvp->tv_usec = (sys_cputimer->freq64_usec * delta) >> 32; |
| 984263bc | 1006 | |
| 5eb5a6bc MD |
1007 | bt = &basetime[basetime_index]; |
| 1008 | tvp->tv_sec += bt->tv_sec; | |
| 1009 | tvp->tv_usec += bt->tv_nsec / 1000; | |
| 88c4d2f6 MD |
1010 | while (tvp->tv_usec >= 1000000) { |
| 1011 | tvp->tv_usec -= 1000000; | |
| 1012 | ++tvp->tv_sec; | |
| 984263bc | 1013 | } |
| 984263bc MD |
1014 | } |
| 1015 | ||
| 1016 | void | |
| 88c4d2f6 | 1017 | getnanotime(struct timespec *tsp) |
| 984263bc | 1018 | { |
| 88c4d2f6 | 1019 | struct globaldata *gd = mycpu; |
| 5eb5a6bc | 1020 | struct timespec *bt; |
| 88c4d2f6 | 1021 | sysclock_t delta; |
| 984263bc | 1022 | |
| 88c4d2f6 MD |
1023 | do { |
| 1024 | tsp->tv_sec = gd->gd_time_seconds; | |
| 1025 | delta = gd->gd_hardclock.time - gd->gd_cpuclock_base; | |
| 1026 | } while (tsp->tv_sec != gd->gd_time_seconds); | |
| fad57d0e | 1027 | |
| 044ee7c4 MD |
1028 | if (delta >= sys_cputimer->freq) { |
| 1029 | tsp->tv_sec += delta / sys_cputimer->freq; | |
| 1030 | delta %= sys_cputimer->freq; | |
| fad57d0e | 1031 | } |
| 044ee7c4 | 1032 | tsp->tv_nsec = (sys_cputimer->freq64_nsec * delta) >> 32; |
| 984263bc | 1033 | |
| 5eb5a6bc MD |
1034 | bt = &basetime[basetime_index]; |
| 1035 | tsp->tv_sec += bt->tv_sec; | |
| 1036 | tsp->tv_nsec += bt->tv_nsec; | |
| 88c4d2f6 MD |
1037 | while (tsp->tv_nsec >= 1000000000) { |
| 1038 | tsp->tv_nsec -= 1000000000; | |
| 1039 | ++tsp->tv_sec; | |
| 984263bc | 1040 | } |
| 984263bc MD |
1041 | } |
| 1042 | ||
| 5eb5a6bc MD |
1043 | static void |
| 1044 | getnanotime_nbt(struct timespec *nbt, struct timespec *tsp) | |
| 1045 | { | |
| 1046 | struct globaldata *gd = mycpu; | |
| 1047 | sysclock_t delta; | |
| 1048 | ||
| 1049 | do { | |
| 1050 | tsp->tv_sec = gd->gd_time_seconds; | |
| 1051 | delta = gd->gd_hardclock.time - gd->gd_cpuclock_base; | |
| 1052 | } while (tsp->tv_sec != gd->gd_time_seconds); | |
| 1053 | ||
| 044ee7c4 MD |
1054 | if (delta >= sys_cputimer->freq) { |
| 1055 | tsp->tv_sec += delta / sys_cputimer->freq; | |
| 1056 | delta %= sys_cputimer->freq; | |
| 5eb5a6bc | 1057 | } |
| 044ee7c4 | 1058 | tsp->tv_nsec = (sys_cputimer->freq64_nsec * delta) >> 32; |
| 5eb5a6bc MD |
1059 | |
| 1060 | tsp->tv_sec += nbt->tv_sec; | |
| 1061 | tsp->tv_nsec += nbt->tv_nsec; | |
| 1062 | while (tsp->tv_nsec >= 1000000000) { | |
| 1063 | tsp->tv_nsec -= 1000000000; | |
| 1064 | ++tsp->tv_sec; | |
| 1065 | } | |
| 1066 | } | |
| 1067 | ||
| 1068 | ||
| 88c4d2f6 MD |
1069 | void |
| 1070 | microtime(struct timeval *tvp) | |
| 984263bc | 1071 | { |
| 88c4d2f6 | 1072 | struct globaldata *gd = mycpu; |
| 5eb5a6bc | 1073 | struct timespec *bt; |
| 88c4d2f6 | 1074 | sysclock_t delta; |
| 984263bc | 1075 | |
| 88c4d2f6 MD |
1076 | do { |
| 1077 | tvp->tv_sec = gd->gd_time_seconds; | |
| 044ee7c4 | 1078 | delta = sys_cputimer->count() - gd->gd_cpuclock_base; |
| 88c4d2f6 | 1079 | } while (tvp->tv_sec != gd->gd_time_seconds); |
| fad57d0e | 1080 | |
| 044ee7c4 MD |
1081 | if (delta >= sys_cputimer->freq) { |
| 1082 | tvp->tv_sec += delta / sys_cputimer->freq; | |
| 1083 | delta %= sys_cputimer->freq; | |
| fad57d0e | 1084 | } |
| 044ee7c4 | 1085 | tvp->tv_usec = (sys_cputimer->freq64_usec * delta) >> 32; |
| 984263bc | 1086 | |
| 5eb5a6bc MD |
1087 | bt = &basetime[basetime_index]; |
| 1088 | tvp->tv_sec += bt->tv_sec; | |
| 1089 | tvp->tv_usec += bt->tv_nsec / 1000; | |
| 88c4d2f6 MD |
1090 | while (tvp->tv_usec >= 1000000) { |
| 1091 | tvp->tv_usec -= 1000000; | |
| 1092 | ++tvp->tv_sec; | |
| 984263bc | 1093 | } |
| 984263bc MD |
1094 | } |
| 1095 | ||
| 88c4d2f6 MD |
1096 | void |
| 1097 | nanotime(struct timespec *tsp) | |
| 1098 | { | |
| 1099 | struct globaldata *gd = mycpu; | |
| 5eb5a6bc | 1100 | struct timespec *bt; |
| 88c4d2f6 | 1101 | sysclock_t delta; |
| 984263bc | 1102 | |
| 88c4d2f6 MD |
1103 | do { |
| 1104 | tsp->tv_sec = gd->gd_time_seconds; | |
| 044ee7c4 | 1105 | delta = sys_cputimer->count() - gd->gd_cpuclock_base; |
| 88c4d2f6 | 1106 | } while (tsp->tv_sec != gd->gd_time_seconds); |
| fad57d0e | 1107 | |
| 044ee7c4 MD |
1108 | if (delta >= sys_cputimer->freq) { |
| 1109 | tsp->tv_sec += delta / sys_cputimer->freq; | |
| 1110 | delta %= sys_cputimer->freq; | |
| fad57d0e | 1111 | } |
| 044ee7c4 | 1112 | tsp->tv_nsec = (sys_cputimer->freq64_nsec * delta) >> 32; |
| 984263bc | 1113 | |
| 5eb5a6bc MD |
1114 | bt = &basetime[basetime_index]; |
| 1115 | tsp->tv_sec += bt->tv_sec; | |
| 1116 | tsp->tv_nsec += bt->tv_nsec; | |
| 88c4d2f6 MD |
1117 | while (tsp->tv_nsec >= 1000000000) { |
| 1118 | tsp->tv_nsec -= 1000000000; | |
| 1119 | ++tsp->tv_sec; | |
| 984263bc | 1120 | } |
| 984263bc MD |
1121 | } |
| 1122 | ||
| 25b804e7 MD |
1123 | /* |
| 1124 | * note: this is not exactly synchronized with real time. To do that we | |
| 1125 | * would have to do what microtime does and check for a nanoseconds overflow. | |
| 1126 | */ | |
| 1127 | time_t | |
| 1128 | get_approximate_time_t(void) | |
| 1129 | { | |
| 1130 | struct globaldata *gd = mycpu; | |
| 5eb5a6bc MD |
1131 | struct timespec *bt; |
| 1132 | ||
| 1133 | bt = &basetime[basetime_index]; | |
| 1134 | return(gd->gd_time_seconds + bt->tv_sec); | |
| 25b804e7 MD |
1135 | } |
| 1136 | ||
| 984263bc MD |
1137 | int |
| 1138 | pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) | |
| 1139 | { | |
| 1140 | pps_params_t *app; | |
| 1141 | struct pps_fetch_args *fapi; | |
| 1142 | #ifdef PPS_SYNC | |
| 1143 | struct pps_kcbind_args *kapi; | |
| 1144 | #endif | |
| 1145 | ||
| 1146 | switch (cmd) { | |
| 1147 | case PPS_IOC_CREATE: | |
| 1148 | return (0); | |
| 1149 | case PPS_IOC_DESTROY: | |
| 1150 | return (0); | |
| 1151 | case PPS_IOC_SETPARAMS: | |
| 1152 | app = (pps_params_t *)data; | |
| 1153 | if (app->mode & ~pps->ppscap) | |
| 1154 | return (EINVAL); | |
| 1155 | pps->ppsparam = *app; | |
| 1156 | return (0); | |
| 1157 | case PPS_IOC_GETPARAMS: | |
| 1158 | app = (pps_params_t *)data; | |
| 1159 | *app = pps->ppsparam; | |
| 1160 | app->api_version = PPS_API_VERS_1; | |
| 1161 | return (0); | |
| 1162 | case PPS_IOC_GETCAP: | |
| 1163 | *(int*)data = pps->ppscap; | |
| 1164 | return (0); | |
| 1165 | case PPS_IOC_FETCH: | |
| 1166 | fapi = (struct pps_fetch_args *)data; | |
| 1167 | if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) | |
| 1168 | return (EINVAL); | |
| 1169 | if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) | |
| 1170 | return (EOPNOTSUPP); | |
| 1171 | pps->ppsinfo.current_mode = pps->ppsparam.mode; | |
| 1172 | fapi->pps_info_buf = pps->ppsinfo; | |
| 1173 | return (0); | |
| 1174 | case PPS_IOC_KCBIND: | |
| 1175 | #ifdef PPS_SYNC | |
| 1176 | kapi = (struct pps_kcbind_args *)data; | |
| 1177 | /* XXX Only root should be able to do this */ | |
| 1178 | if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) | |
| 1179 | return (EINVAL); | |
| 1180 | if (kapi->kernel_consumer != PPS_KC_HARDPPS) | |
| 1181 | return (EINVAL); | |
| 1182 | if (kapi->edge & ~pps->ppscap) | |
| 1183 | return (EINVAL); | |
| 1184 | pps->kcmode = kapi->edge; | |
| 1185 | return (0); | |
| 1186 | #else | |
| 1187 | return (EOPNOTSUPP); | |
| 1188 | #endif | |
| 1189 | default: | |
| 1190 | return (ENOTTY); | |
| 1191 | } | |
| 1192 | } | |
| 1193 | ||
| 1194 | void | |
| 1195 | pps_init(struct pps_state *pps) | |
| 1196 | { | |
| 1197 | pps->ppscap |= PPS_TSFMT_TSPEC; | |
| 1198 | if (pps->ppscap & PPS_CAPTUREASSERT) | |
| 1199 | pps->ppscap |= PPS_OFFSETASSERT; | |
| 1200 | if (pps->ppscap & PPS_CAPTURECLEAR) | |
| 1201 | pps->ppscap |= PPS_OFFSETCLEAR; | |
| 1202 | } | |
| 1203 | ||
| 1204 | void | |
| 88c4d2f6 | 1205 | pps_event(struct pps_state *pps, sysclock_t count, int event) |
| 984263bc | 1206 | { |
| 88c4d2f6 MD |
1207 | struct globaldata *gd; |
| 1208 | struct timespec *tsp; | |
| 1209 | struct timespec *osp; | |
| 5eb5a6bc | 1210 | struct timespec *bt; |
| 88c4d2f6 MD |
1211 | struct timespec ts; |
| 1212 | sysclock_t *pcount; | |
| 1213 | #ifdef PPS_SYNC | |
| 1214 | sysclock_t tcount; | |
| 1215 | #endif | |
| 1216 | sysclock_t delta; | |
| 1217 | pps_seq_t *pseq; | |
| 1218 | int foff; | |
| 1219 | int fhard; | |
| 1220 | ||
| 1221 | gd = mycpu; | |
| 984263bc MD |
1222 | |
| 1223 | /* Things would be easier with arrays... */ | |
| 1224 | if (event == PPS_CAPTUREASSERT) { | |
| 1225 | tsp = &pps->ppsinfo.assert_timestamp; | |
| 1226 | osp = &pps->ppsparam.assert_offset; | |
| 1227 | foff = pps->ppsparam.mode & PPS_OFFSETASSERT; | |
| 1228 | fhard = pps->kcmode & PPS_CAPTUREASSERT; | |
| 1229 | pcount = &pps->ppscount[0]; | |
| 1230 | pseq = &pps->ppsinfo.assert_sequence; | |
| 1231 | } else { | |
| 1232 | tsp = &pps->ppsinfo.clear_timestamp; | |
| 1233 | osp = &pps->ppsparam.clear_offset; | |
| 1234 | foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; | |
| 1235 | fhard = pps->kcmode & PPS_CAPTURECLEAR; | |
| 1236 | pcount = &pps->ppscount[1]; | |
| 1237 | pseq = &pps->ppsinfo.clear_sequence; | |
| 1238 | } | |
| 1239 | ||
| 984263bc MD |
1240 | /* Nothing really happened */ |
| 1241 | if (*pcount == count) | |
| 1242 | return; | |
| 1243 | ||
| 1244 | *pcount = count; | |
| 1245 | ||
| 88c4d2f6 MD |
1246 | do { |
| 1247 | ts.tv_sec = gd->gd_time_seconds; | |
| 1248 | delta = count - gd->gd_cpuclock_base; | |
| 1249 | } while (ts.tv_sec != gd->gd_time_seconds); | |
| fad57d0e | 1250 | |
| 044ee7c4 MD |
1251 | if (delta >= sys_cputimer->freq) { |
| 1252 | ts.tv_sec += delta / sys_cputimer->freq; | |
| 1253 | delta %= sys_cputimer->freq; | |
| 88c4d2f6 | 1254 | } |
| 044ee7c4 | 1255 | ts.tv_nsec = (sys_cputimer->freq64_nsec * delta) >> 32; |
| 5eb5a6bc MD |
1256 | bt = &basetime[basetime_index]; |
| 1257 | ts.tv_sec += bt->tv_sec; | |
| 1258 | ts.tv_nsec += bt->tv_nsec; | |
| 88c4d2f6 MD |
1259 | while (ts.tv_nsec >= 1000000000) { |
| 1260 | ts.tv_nsec -= 1000000000; | |
| 1261 | ++ts.tv_sec; | |
| 984263bc | 1262 | } |
| 984263bc MD |
1263 | |
| 1264 | (*pseq)++; | |
| 1265 | *tsp = ts; | |
| 1266 | ||
| 1267 | if (foff) { | |
| 1268 | timespecadd(tsp, osp); | |
| 1269 | if (tsp->tv_nsec < 0) { | |
| 1270 | tsp->tv_nsec += 1000000000; | |
| 1271 | tsp->tv_sec -= 1; | |
| 1272 | } | |
| 1273 | } | |
| 1274 | #ifdef PPS_SYNC | |
| 1275 | if (fhard) { | |
| 1276 | /* magic, at its best... */ | |
| 1277 | tcount = count - pps->ppscount[2]; | |
| 1278 | pps->ppscount[2] = count; | |
| 044ee7c4 MD |
1279 | if (tcount >= sys_cputimer->freq) { |
| 1280 | delta = (1000000000 * (tcount / sys_cputimer->freq) + | |
| 1281 | sys_cputimer->freq64_nsec * | |
| 1282 | (tcount % sys_cputimer->freq)) >> 32; | |
| fad57d0e | 1283 | } else { |
| 044ee7c4 | 1284 | delta = (sys_cputimer->freq64_nsec * tcount) >> 32; |
| fad57d0e | 1285 | } |
| 984263bc MD |
1286 | hardpps(tsp, delta); |
| 1287 | } | |
| 1288 | #endif | |
| 1289 | } | |
| 88c4d2f6 | 1290 |