kernel - extend cpus past 64 - fixes and adjustments
[dragonfly.git] / sys / platform / pc64 / isa / clock.c
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * William Jolitz and Don Ahn.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by the University of
20  *      California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *      from: @(#)clock.c       7.2 (Berkeley) 5/12/91
38  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
39  */
40
41 /*
42  * Routines to handle clock hardware.
43  */
44
45 /*
46  * inittodr, settodr and support routines written
47  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
48  *
49  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
50  */
51
52 #if 0
53 #include "opt_clock.h"
54 #endif
55
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/eventhandler.h>
59 #include <sys/time.h>
60 #include <sys/kernel.h>
61 #include <sys/bus.h>
62 #include <sys/sysctl.h>
63 #include <sys/cons.h>
64 #include <sys/systimer.h>
65 #include <sys/globaldata.h>
66 #include <sys/thread2.h>
67 #include <sys/machintr.h>
68 #include <sys/interrupt.h>
69
70 #include <machine/clock.h>
71 #include <machine/cputypes.h>
72 #include <machine/frame.h>
73 #include <machine/ipl.h>
74 #include <machine/limits.h>
75 #include <machine/md_var.h>
76 #include <machine/psl.h>
77 #include <machine/segments.h>
78 #include <machine/smp.h>
79 #include <machine/specialreg.h>
80 #include <machine/intr_machdep.h>
81
82 #include <machine_base/apic/ioapic.h>
83 #include <machine_base/apic/ioapic_abi.h>
84 #include <machine_base/icu/icu.h>
85 #include <bus/isa/isa.h>
86 #include <bus/isa/rtc.h>
87 #include <machine_base/isa/timerreg.h>
88
89 static void i8254_restore(void);
90 static void resettodr_on_shutdown(void *arg __unused);
91
92 /*
93  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
94  * can use a simple formula for leap years.
95  */
96 #define LEAPYEAR(y) ((u_int)(y) % 4 == 0)
97 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
98
99 #ifndef TIMER_FREQ
100 #define TIMER_FREQ   1193182
101 #endif
102
103 static uint8_t i8254_walltimer_sel;
104 static uint16_t i8254_walltimer_cntr;
105
106 int     adjkerntz;              /* local offset from GMT in seconds */
107 int     disable_rtc_set;        /* disable resettodr() if != 0 */
108 int     tsc_present;
109 int     tsc_invariant;
110 int     tsc_mpsync;
111 int64_t tsc_frequency;
112 int     tsc_is_broken;
113 int     wall_cmos_clock;        /* wall CMOS clock assumed if != 0 */
114 int     timer0_running;
115 enum tstate { RELEASED, ACQUIRED };
116 enum tstate timer0_state;
117 enum tstate timer1_state;
118 enum tstate timer2_state;
119
120 static  int     beeping = 0;
121 static  const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
122 static  u_char  rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
123 static  u_char  rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
124 static  int     rtc_loaded;
125
126 static int i8254_cputimer_div;
127
128 static int i8254_nointr;
129 static int i8254_intr_disable = 1;
130 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
131
132 static struct callout sysbeepstop_ch;
133
134 static sysclock_t i8254_cputimer_count(void);
135 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
136 static void i8254_cputimer_destruct(struct cputimer *cputimer);
137
138 static struct cputimer  i8254_cputimer = {
139     SLIST_ENTRY_INITIALIZER,
140     "i8254",
141     CPUTIMER_PRI_8254,
142     0,
143     i8254_cputimer_count,
144     cputimer_default_fromhz,
145     cputimer_default_fromus,
146     i8254_cputimer_construct,
147     i8254_cputimer_destruct,
148     TIMER_FREQ,
149     0, 0, 0
150 };
151
152 static sysclock_t tsc_cputimer_count(void);
153 static void tsc_cputimer_construct(struct cputimer *, sysclock_t);
154
155 static struct cputimer  tsc_cputimer = {
156     SLIST_ENTRY_INITIALIZER,
157     "TSC",
158     CPUTIMER_PRI_TSC,
159     CPUTIMER_TSC,
160     tsc_cputimer_count,
161     cputimer_default_fromhz,
162     cputimer_default_fromus,
163     tsc_cputimer_construct,
164     cputimer_default_destruct,
165     0,
166     0, 0, 0
167 };
168
169 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
170 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
171 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
172
173 static struct cputimer_intr i8254_cputimer_intr = {
174     .freq = TIMER_FREQ,
175     .reload = i8254_intr_reload,
176     .enable = cputimer_intr_default_enable,
177     .config = i8254_intr_config,
178     .restart = cputimer_intr_default_restart,
179     .pmfixup = cputimer_intr_default_pmfixup,
180     .initclock = i8254_intr_initclock,
181     .next = SLIST_ENTRY_INITIALIZER,
182     .name = "i8254",
183     .type = CPUTIMER_INTR_8254,
184     .prio = CPUTIMER_INTR_PRIO_8254,
185     .caps = CPUTIMER_INTR_CAP_PS
186 };
187
188 /*
189  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
190  * counting as of this interrupt.  We use timer1 in free-running mode (not
191  * generating any interrupts) as our main counter.  Each cpu has timeouts
192  * pending.
193  *
194  * This code is INTR_MPSAFE and may be called without the BGL held.
195  */
196 static void
197 clkintr(void *dummy, void *frame_arg)
198 {
199         static sysclock_t sysclock_count;       /* NOTE! Must be static */
200         struct globaldata *gd = mycpu;
201         struct globaldata *gscan;
202         int n;
203
204         /*
205          * SWSTROBE mode is a one-shot, the timer is no longer running
206          */
207         timer0_running = 0;
208
209         /*
210          * XXX the dispatcher needs work.  right now we call systimer_intr()
211          * directly or via IPI for any cpu with systimers queued, which is
212          * usually *ALL* of them.  We need to use the LAPIC timer for this.
213          */
214         sysclock_count = sys_cputimer->count();
215         for (n = 0; n < ncpus; ++n) {
216             gscan = globaldata_find(n);
217             if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
218                 continue;
219             if (gscan != gd) {
220                 lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, 
221                                 &sysclock_count, 1);
222             } else {
223                 systimer_intr(&sysclock_count, 0, frame_arg);
224             }
225         }
226 }
227
228
229 /*
230  * NOTE! not MP safe.
231  */
232 int
233 acquire_timer2(int mode)
234 {
235         if (timer2_state != RELEASED)
236                 return (-1);
237         timer2_state = ACQUIRED;
238
239         /*
240          * This access to the timer registers is as atomic as possible
241          * because it is a single instruction.  We could do better if we
242          * knew the rate.
243          */
244         outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
245         return (0);
246 }
247
248 int
249 release_timer2(void)
250 {
251         if (timer2_state != ACQUIRED)
252                 return (-1);
253         outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
254         timer2_state = RELEASED;
255         return (0);
256 }
257
258 #include "opt_ddb.h"
259 #ifdef DDB
260 #include <ddb/ddb.h>
261
262 DB_SHOW_COMMAND(rtc, rtc)
263 {
264         kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
265                rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
266                rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
267                rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
268 }
269 #endif /* DDB */
270
271 /*
272  * Return the current cpu timer count as a 32 bit integer.
273  */
274 static
275 sysclock_t
276 i8254_cputimer_count(void)
277 {
278         static __uint16_t cputimer_last;
279         __uint16_t count;
280         sysclock_t ret;
281
282         clock_lock();
283         outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
284         count = (__uint8_t)inb(i8254_walltimer_cntr);           /* get countdown */
285         count |= ((__uint8_t)inb(i8254_walltimer_cntr) << 8);
286         count = -count;                                 /* -> countup */
287         if (count < cputimer_last)                      /* rollover */
288                 i8254_cputimer.base += 0x00010000;
289         ret = i8254_cputimer.base | count;
290         cputimer_last = count;
291         clock_unlock();
292         return(ret);
293 }
294
295 /*
296  * This function is called whenever the system timebase changes, allowing
297  * us to calculate what is needed to convert a system timebase tick 
298  * into an 8254 tick for the interrupt timer.  If we can convert to a
299  * simple shift, multiplication, or division, we do so.  Otherwise 64
300  * bit arithmatic is required every time the interrupt timer is reloaded.
301  */
302 static void
303 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
304 {
305     int freq;
306     int div;
307
308     /*
309      * Will a simple divide do the trick?
310      */
311     div = (timer->freq + (cti->freq / 2)) / cti->freq;
312     freq = cti->freq * div;
313
314     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
315         i8254_cputimer_div = div;
316     else
317         i8254_cputimer_div = 0;
318 }
319
320 /*
321  * Reload for the next timeout.  It is possible for the reload value
322  * to be 0 or negative, indicating that an immediate timer interrupt
323  * is desired.  For now make the minimum 2 ticks.
324  *
325  * We may have to convert from the system timebase to the 8254 timebase.
326  */
327 static void
328 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
329 {
330     __uint16_t count;
331
332     if (i8254_cputimer_div)
333         reload /= i8254_cputimer_div;
334     else
335         reload = (int64_t)reload * cti->freq / sys_cputimer->freq;
336
337     if ((int)reload < 2)
338         reload = 2;
339
340     clock_lock();
341     if (timer0_running) {
342         outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);     /* count-down timer */
343         count = (__uint8_t)inb(TIMER_CNTR0);            /* lsb */
344         count |= ((__uint8_t)inb(TIMER_CNTR0) << 8);    /* msb */
345         if (reload < count) {
346             outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
347             outb(TIMER_CNTR0, (__uint8_t)reload);       /* lsb */
348             outb(TIMER_CNTR0, (__uint8_t)(reload >> 8)); /* msb */
349         }
350     } else {
351         timer0_running = 1;
352         if (reload > 0xFFFF)
353             reload = 0;         /* full count */
354         outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
355         outb(TIMER_CNTR0, (__uint8_t)reload);           /* lsb */
356         outb(TIMER_CNTR0, (__uint8_t)(reload >> 8));    /* msb */
357     }
358     clock_unlock();
359 }
360
361 /*
362  * DELAY(usec)       - Spin for the specified number of microseconds.
363  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
364  *                     but do a thread switch in the loop
365  *
366  * Relies on timer 1 counting down from (cputimer_freq / hz)
367  * Note: timer had better have been programmed before this is first used!
368  */
369 static void
370 DODELAY(int n, int doswitch)
371 {
372         ssysclock_t delta, ticks_left;
373         sysclock_t prev_tick, tick;
374
375 #ifdef DELAYDEBUG
376         int getit_calls = 1;
377         int n1;
378         static int state = 0;
379
380         if (state == 0) {
381                 state = 1;
382                 for (n1 = 1; n1 <= 10000000; n1 *= 10)
383                         DELAY(n1);
384                 state = 2;
385         }
386         if (state == 1)
387                 kprintf("DELAY(%d)...", n);
388 #endif
389         /*
390          * Guard against the timer being uninitialized if we are called
391          * early for console i/o.
392          */
393         if (timer0_state == RELEASED)
394                 i8254_restore();
395
396         /*
397          * Read the counter first, so that the rest of the setup overhead is
398          * counted.  Then calculate the number of hardware timer ticks
399          * required, rounding up to be sure we delay at least the requested
400          * number of microseconds.
401          */
402         prev_tick = sys_cputimer->count();
403         ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) /
404                      1000000;
405
406         /*
407          * Loop until done.
408          */
409         while (ticks_left > 0) {
410                 tick = sys_cputimer->count();
411 #ifdef DELAYDEBUG
412                 ++getit_calls;
413 #endif
414                 delta = tick - prev_tick;
415                 prev_tick = tick;
416                 if (delta < 0)
417                         delta = 0;
418                 ticks_left -= delta;
419                 if (doswitch && ticks_left > 0)
420                         lwkt_switch();
421                 cpu_pause();
422         }
423 #ifdef DELAYDEBUG
424         if (state == 1)
425                 kprintf(" %d calls to getit() at %d usec each\n",
426                        getit_calls, (n + 5) / getit_calls);
427 #endif
428 }
429
430 /*
431  * DELAY() never switches.
432  */
433 void
434 DELAY(int n)
435 {
436         DODELAY(n, 0);
437 }
438
439 /*
440  * Returns non-zero if the specified time period has elapsed.  Call
441  * first with last_clock set to 0.
442  */
443 int
444 CHECKTIMEOUT(TOTALDELAY *tdd)
445 {
446         sysclock_t delta;
447         int us;
448
449         if (tdd->started == 0) {
450                 if (timer0_state == RELEASED)
451                         i8254_restore();
452                 tdd->last_clock = sys_cputimer->count();
453                 tdd->started = 1;
454                 return(0);
455         }
456         delta = sys_cputimer->count() - tdd->last_clock;
457         us = (u_int64_t)delta * (u_int64_t)1000000 /
458              (u_int64_t)sys_cputimer->freq;
459         tdd->last_clock += (u_int64_t)us * (u_int64_t)sys_cputimer->freq /
460                            1000000;
461         tdd->us -= us;
462         return (tdd->us < 0);
463 }
464
465
466 /*
467  * DRIVERSLEEP() does not switch if called with a spinlock held or
468  * from a hard interrupt.
469  */
470 void
471 DRIVERSLEEP(int usec)
472 {
473         globaldata_t gd = mycpu;
474
475         if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
476                 DODELAY(usec, 0);
477         } else {
478                 DODELAY(usec, 1);
479         }
480 }
481
482 static void
483 sysbeepstop(void *chan)
484 {
485         outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */
486         beeping = 0;
487         release_timer2();
488 }
489
490 int
491 sysbeep(int pitch, int period)
492 {
493         if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
494                 return(-1);
495         if (sysbeep_enable == 0)
496                 return(-1);
497         /*
498          * Nobody else is using timer2, we do not need the clock lock
499          */
500         outb(TIMER_CNTR2, pitch);
501         outb(TIMER_CNTR2, (pitch>>8));
502         if (!beeping) {
503                 /* enable counter2 output to speaker */
504                 outb(IO_PPI, inb(IO_PPI) | 3);
505                 beeping = period;
506                 callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
507         }
508         return (0);
509 }
510
511 /*
512  * RTC support routines
513  */
514
515 int
516 rtcin(int reg)
517 {
518         u_char val;
519
520         crit_enter();
521         outb(IO_RTC, reg);
522         inb(0x84);
523         val = inb(IO_RTC + 1);
524         inb(0x84);
525         crit_exit();
526         return (val);
527 }
528
529 static __inline void
530 writertc(u_char reg, u_char val)
531 {
532         crit_enter();
533         inb(0x84);
534         outb(IO_RTC, reg);
535         inb(0x84);
536         outb(IO_RTC + 1, val);
537         inb(0x84);              /* XXX work around wrong order in rtcin() */
538         crit_exit();
539 }
540
541 static __inline int
542 readrtc(int port)
543 {
544         return(bcd2bin(rtcin(port)));
545 }
546
547 static u_int
548 calibrate_clocks(void)
549 {
550         u_int64_t old_tsc;
551         u_int tot_count;
552         sysclock_t count, prev_count;
553         int sec, start_sec, timeout;
554
555         if (bootverbose)
556                 kprintf("Calibrating clock(s) ...\n");
557         if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
558                 goto fail;
559         timeout = 100000000;
560
561         /* Read the mc146818A seconds counter. */
562         for (;;) {
563                 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
564                         sec = rtcin(RTC_SEC);
565                         break;
566                 }
567                 if (--timeout == 0)
568                         goto fail;
569         }
570
571         /* Wait for the mC146818A seconds counter to change. */
572         start_sec = sec;
573         for (;;) {
574                 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
575                         sec = rtcin(RTC_SEC);
576                         if (sec != start_sec)
577                                 break;
578                 }
579                 if (--timeout == 0)
580                         goto fail;
581         }
582
583         /* Start keeping track of the i8254 counter. */
584         prev_count = sys_cputimer->count();
585         tot_count = 0;
586
587         if (tsc_present) 
588                 old_tsc = rdtsc();
589         else
590                 old_tsc = 0;            /* shut up gcc */
591
592         /*
593          * Wait for the mc146818A seconds counter to change.  Read the i8254
594          * counter for each iteration since this is convenient and only
595          * costs a few usec of inaccuracy. The timing of the final reads
596          * of the counters almost matches the timing of the initial reads,
597          * so the main cause of inaccuracy is the varying latency from 
598          * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
599          * rtcin(RTC_SEC) that returns a changed seconds count.  The
600          * maximum inaccuracy from this cause is < 10 usec on 486's.
601          */
602         start_sec = sec;
603         for (;;) {
604                 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
605                         sec = rtcin(RTC_SEC);
606                 count = sys_cputimer->count();
607                 tot_count += (int)(count - prev_count);
608                 prev_count = count;
609                 if (sec != start_sec)
610                         break;
611                 if (--timeout == 0)
612                         goto fail;
613         }
614
615         /*
616          * Read the cpu cycle counter.  The timing considerations are
617          * similar to those for the i8254 clock.
618          */
619         if (tsc_present) {
620                 tsc_frequency = rdtsc() - old_tsc;
621         }
622
623         if (tsc_present) {
624                 kprintf("TSC%s clock: %llu Hz, ",
625                     tsc_invariant ? " invariant" : "",
626                     (long long)tsc_frequency);
627         }
628         kprintf("i8254 clock: %u Hz\n", tot_count);
629         return (tot_count);
630
631 fail:
632         kprintf("failed, using default i8254 clock of %u Hz\n",
633                 i8254_cputimer.freq);
634         return (i8254_cputimer.freq);
635 }
636
637 static void
638 i8254_restore(void)
639 {
640         timer0_state = ACQUIRED;
641
642         clock_lock();
643
644         /*
645          * Timer0 is our fine-grained variable clock interrupt
646          */
647         outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
648         outb(TIMER_CNTR0, 2);   /* lsb */
649         outb(TIMER_CNTR0, 0);   /* msb */
650         clock_unlock();
651
652         if (!i8254_nointr) {
653                 cputimer_intr_register(&i8254_cputimer_intr);
654                 cputimer_intr_select(&i8254_cputimer_intr, 0);
655         }
656
657         /*
658          * Timer1 or timer2 is our free-running clock, but only if another
659          * has not been selected.
660          */
661         cputimer_register(&i8254_cputimer);
662         cputimer_select(&i8254_cputimer, 0);
663 }
664
665 static void
666 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
667 {
668         int which;
669
670         /*
671          * Should we use timer 1 or timer 2 ?
672          */
673         which = 0;
674         TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
675         if (which != 1 && which != 2)
676                 which = 2;
677
678         switch(which) {
679         case 1:
680                 timer->name = "i8254_timer1";
681                 timer->type = CPUTIMER_8254_SEL1;
682                 i8254_walltimer_sel = TIMER_SEL1;
683                 i8254_walltimer_cntr = TIMER_CNTR1;
684                 timer1_state = ACQUIRED;
685                 break;
686         case 2:
687                 timer->name = "i8254_timer2";
688                 timer->type = CPUTIMER_8254_SEL2;
689                 i8254_walltimer_sel = TIMER_SEL2;
690                 i8254_walltimer_cntr = TIMER_CNTR2;
691                 timer2_state = ACQUIRED;
692                 break;
693         }
694
695         timer->base = (oldclock + 0xFFFF) & ~0xFFFF;
696
697         clock_lock();
698         outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
699         outb(i8254_walltimer_cntr, 0);  /* lsb */
700         outb(i8254_walltimer_cntr, 0);  /* msb */
701         outb(IO_PPI, inb(IO_PPI) | 1);  /* bit 0: enable gate, bit 1: spkr */
702         clock_unlock();
703 }
704
705 static void
706 i8254_cputimer_destruct(struct cputimer *timer)
707 {
708         switch(timer->type) {
709         case CPUTIMER_8254_SEL1:
710             timer1_state = RELEASED;
711             break;
712         case CPUTIMER_8254_SEL2:
713             timer2_state = RELEASED;
714             break;
715         default:
716             break;
717         }
718         timer->type = 0;
719 }
720
721 static void
722 rtc_restore(void)
723 {
724         /* Restore all of the RTC's "status" (actually, control) registers. */
725         writertc(RTC_STATUSB, RTCSB_24HR);
726         writertc(RTC_STATUSA, rtc_statusa);
727         writertc(RTC_STATUSB, rtc_statusb);
728 }
729
730 /*
731  * Restore all the timers.
732  *
733  * This function is called to resynchronize our core timekeeping after a
734  * long halt, e.g. from apm_default_resume() and friends.  It is also 
735  * called if after a BIOS call we have detected munging of the 8254.
736  * It is necessary because cputimer_count() counter's delta may have grown
737  * too large for nanouptime() and friends to handle, or (in the case of 8254
738  * munging) might cause the SYSTIMER code to prematurely trigger.
739  */
740 void
741 timer_restore(void)
742 {
743         crit_enter();
744         i8254_restore();                /* restore timer_freq and hz */
745         rtc_restore();                  /* reenable RTC interrupts */
746         crit_exit();
747 }
748
749 /*
750  * Initialize 8254 timer 0 early so that it can be used in DELAY().
751  */
752 void
753 startrtclock(void)
754 {
755         u_int delta, freq;
756
757         /* 
758          * Can we use the TSC?
759          *
760          * NOTE: If running under qemu, probably a good idea to force the
761          *       TSC because we are not likely to detect it as being
762          *       invariant or mpsyncd if you don't.  This will greatly
763          *       reduce SMP contention.
764          */
765         if (cpu_feature & CPUID_TSC) {
766                 tsc_present = 1;
767                 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
768
769                 if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
770                      cpu_vendor_id == CPU_VENDOR_AMD) &&
771                     cpu_exthigh >= 0x80000007) {
772                         u_int regs[4];
773
774                         do_cpuid(0x80000007, regs);
775                         if (regs[3] & 0x100)
776                                 tsc_invariant = 1;
777                 }
778         } else {
779                 tsc_present = 0;
780         }
781
782         /*
783          * Initial RTC state, don't do anything unexpected
784          */
785         writertc(RTC_STATUSA, rtc_statusa);
786         writertc(RTC_STATUSB, RTCSB_24HR);
787
788         /*
789          * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to 
790          * generate an interrupt, which we will ignore for now.
791          *
792          * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
793          * (so it counts a full 2^16 and repeats).  We will use this timer
794          * for our counting.
795          */
796         i8254_restore();
797         freq = calibrate_clocks();
798 #ifdef CLK_CALIBRATION_LOOP
799         if (bootverbose) {
800                 kprintf(
801                 "Press a key on the console to abort clock calibration\n");
802                 while (cncheckc() == -1)
803                         calibrate_clocks();
804         }
805 #endif
806
807         /*
808          * Use the calibrated i8254 frequency if it seems reasonable.
809          * Otherwise use the default, and don't use the calibrated i586
810          * frequency.
811          */
812         delta = freq > i8254_cputimer.freq ? 
813                         freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
814         if (delta < i8254_cputimer.freq / 100) {
815 #ifndef CLK_USE_I8254_CALIBRATION
816                 if (bootverbose)
817                         kprintf(
818 "CLK_USE_I8254_CALIBRATION not specified - using default frequency\n");
819                 freq = i8254_cputimer.freq;
820 #endif
821                 /*
822                  * NOTE:
823                  * Interrupt timer's freq must be adjusted
824                  * before we change the cuptimer's frequency.
825                  */
826                 i8254_cputimer_intr.freq = freq;
827                 cputimer_set_frequency(&i8254_cputimer, freq);
828         } else {
829                 if (bootverbose)
830                         kprintf(
831                     "%d Hz differs from default of %d Hz by more than 1%%\n",
832                                freq, i8254_cputimer.freq);
833                 tsc_frequency = 0;
834         }
835
836 #ifndef CLK_USE_TSC_CALIBRATION
837         if (tsc_frequency != 0) {
838                 if (bootverbose)
839                         kprintf(
840 "CLK_USE_TSC_CALIBRATION not specified - using old calibration method\n");
841                 tsc_frequency = 0;
842         }
843 #endif
844         if (tsc_present && tsc_frequency == 0) {
845                 /*
846                  * Calibration of the i586 clock relative to the mc146818A
847                  * clock failed.  Do a less accurate calibration relative
848                  * to the i8254 clock.
849                  */
850                 u_int64_t old_tsc = rdtsc();
851
852                 DELAY(1000000);
853                 tsc_frequency = rdtsc() - old_tsc;
854 #ifdef CLK_USE_TSC_CALIBRATION
855                 if (bootverbose) {
856                         kprintf("TSC clock: %llu Hz (Method B)\n",
857                                 tsc_frequency);
858                 }
859 #endif
860         }
861
862         EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown, NULL, SHUTDOWN_PRI_LAST);
863 }
864
865 /*
866  * Sync the time of day back to the RTC on shutdown, but only if
867  * we have already loaded it and have not crashed.
868  */
869 static void
870 resettodr_on_shutdown(void *arg __unused)
871 {
872         if (rtc_loaded && panicstr == NULL) {
873                 resettodr();
874         }
875 }
876
877 /*
878  * Initialize the time of day register, based on the time base which is, e.g.
879  * from a filesystem.
880  */
881 void
882 inittodr(time_t base)
883 {
884         unsigned long   sec, days;
885         int             year, month;
886         int             y, m;
887         struct timespec ts;
888
889         if (base) {
890                 ts.tv_sec = base;
891                 ts.tv_nsec = 0;
892                 set_timeofday(&ts);
893         }
894
895         /* Look if we have a RTC present and the time is valid */
896         if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
897                 goto wrong_time;
898
899         /* wait for time update to complete */
900         /* If RTCSA_TUP is zero, we have at least 244us before next update */
901         crit_enter();
902         while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
903                 crit_exit();
904                 crit_enter();
905         }
906
907         days = 0;
908 #ifdef USE_RTC_CENTURY
909         year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
910 #else
911         year = readrtc(RTC_YEAR) + 1900;
912         if (year < 1970)
913                 year += 100;
914 #endif
915         if (year < 1970) {
916                 crit_exit();
917                 goto wrong_time;
918         }
919         month = readrtc(RTC_MONTH);
920         for (m = 1; m < month; m++)
921                 days += daysinmonth[m-1];
922         if ((month > 2) && LEAPYEAR(year))
923                 days ++;
924         days += readrtc(RTC_DAY) - 1;
925         for (y = 1970; y < year; y++)
926                 days += DAYSPERYEAR + LEAPYEAR(y);
927         sec = ((( days * 24 +
928                   readrtc(RTC_HRS)) * 60 +
929                   readrtc(RTC_MIN)) * 60 +
930                   readrtc(RTC_SEC));
931         /* sec now contains the number of seconds, since Jan 1 1970,
932            in the local time zone */
933
934         sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
935
936         y = (int)(time_second - sec);
937         if (y <= -2 || y >= 2) {
938                 /* badly off, adjust it */
939                 ts.tv_sec = sec;
940                 ts.tv_nsec = 0;
941                 set_timeofday(&ts);
942         }
943         rtc_loaded = 1;
944         crit_exit();
945         return;
946
947 wrong_time:
948         kprintf("Invalid time in real time clock.\n");
949         kprintf("Check and reset the date immediately!\n");
950 }
951
952 /*
953  * Write system time back to RTC
954  */
955 void
956 resettodr(void)
957 {
958         struct timeval tv;
959         unsigned long tm;
960         int m;
961         int y;
962
963         if (disable_rtc_set)
964                 return;
965
966         microtime(&tv);
967         tm = tv.tv_sec;
968
969         crit_enter();
970         /* Disable RTC updates and interrupts. */
971         writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
972
973         /* Calculate local time to put in RTC */
974
975         tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
976
977         writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;    /* Write back Seconds */
978         writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;    /* Write back Minutes */
979         writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;    /* Write back Hours   */
980
981         /* We have now the days since 01-01-1970 in tm */
982         writertc(RTC_WDAY, (tm+4)%7);                   /* Write back Weekday */
983         for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
984              tm >= m;
985              y++,      m = DAYSPERYEAR + LEAPYEAR(y))
986              tm -= m;
987
988         /* Now we have the years in y and the day-of-the-year in tm */
989         writertc(RTC_YEAR, bin2bcd(y%100));             /* Write back Year    */
990 #ifdef USE_RTC_CENTURY
991         writertc(RTC_CENTURY, bin2bcd(y/100));          /* ... and Century    */
992 #endif
993         for (m = 0; ; m++) {
994                 int ml;
995
996                 ml = daysinmonth[m];
997                 if (m == 1 && LEAPYEAR(y))
998                         ml++;
999                 if (tm < ml)
1000                         break;
1001                 tm -= ml;
1002         }
1003
1004         writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1005         writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1006
1007         /* Reenable RTC updates and interrupts. */
1008         writertc(RTC_STATUSB, rtc_statusb);
1009         crit_exit();
1010 }
1011
1012 static int
1013 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1014 {
1015         sysclock_t base;
1016         long lastcnt;
1017
1018         /*
1019          * Following code assumes the 8254 is the cpu timer,
1020          * so make sure it is.
1021          */
1022         KKASSERT(sys_cputimer == &i8254_cputimer);
1023         KKASSERT(cti == &i8254_cputimer_intr);
1024
1025         lastcnt = get_interrupt_counter(irq, mycpuid);
1026
1027         /*
1028          * Force an 8254 Timer0 interrupt and wait 1/100s for
1029          * it to happen, then see if we got it.
1030          */
1031         kprintf("IOAPIC: testing 8254 interrupt delivery\n");
1032
1033         i8254_intr_reload(cti, 2);
1034         base = sys_cputimer->count();
1035         while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1036                 ; /* nothing */
1037
1038         if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0)
1039                 return ENOENT;
1040         return 0;
1041 }
1042
1043 /*
1044  * Start both clocks running.  DragonFly note: the stat clock is no longer
1045  * used.  Instead, 8254 based systimers are used for all major clock
1046  * interrupts.
1047  */
1048 static void
1049 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1050 {
1051         void *clkdesc = NULL;
1052         int irq = 0, mixed_mode = 0, error;
1053
1054         KKASSERT(mycpuid == 0);
1055         callout_init_mp(&sysbeepstop_ch);
1056
1057         if (!selected && i8254_intr_disable)
1058                 goto nointr;
1059
1060         /*
1061          * The stat interrupt mask is different without the
1062          * statistics clock.  Also, don't set the interrupt
1063          * flag which would normally cause the RTC to generate
1064          * interrupts.
1065          */
1066         rtc_statusb = RTCSB_24HR;
1067
1068         /* Finish initializing 8254 timer 0. */
1069         if (ioapic_enable) {
1070                 irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1071                         INTR_POLARITY_HIGH);
1072                 if (irq < 0) {
1073 mixed_mode_setup:
1074                         error = ioapic_conf_legacy_extint(0);
1075                         if (!error) {
1076                                 irq = machintr_legacy_intr_find(0,
1077                                     INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1078                                 if (irq < 0)
1079                                         error = ENOENT;
1080                         }
1081
1082                         if (error) {
1083                                 if (!selected) {
1084                                         kprintf("IOAPIC: setup mixed mode for "
1085                                                 "irq 0 failed: %d\n", error);
1086                                         goto nointr;
1087                                 } else {
1088                                         panic("IOAPIC: setup mixed mode for "
1089                                               "irq 0 failed: %d\n", error);
1090                                 }
1091                         }
1092                         mixed_mode = 1;
1093                 }
1094                 clkdesc = register_int(irq, clkintr, NULL, "clk",
1095                                        NULL,
1096                                        INTR_EXCL | INTR_CLOCK |
1097                                        INTR_NOPOLL | INTR_MPSAFE |
1098                                        INTR_NOENTROPY, 0);
1099         } else {
1100                 register_int(0, clkintr, NULL, "clk", NULL,
1101                              INTR_EXCL | INTR_CLOCK |
1102                              INTR_NOPOLL | INTR_MPSAFE |
1103                              INTR_NOENTROPY, 0);
1104         }
1105
1106         /* Initialize RTC. */
1107         writertc(RTC_STATUSA, rtc_statusa);
1108         writertc(RTC_STATUSB, RTCSB_24HR);
1109
1110         if (ioapic_enable) {
1111                 error = i8254_ioapic_trial(irq, cti);
1112                 if (error) {
1113                         if (mixed_mode) {
1114                                 if (!selected) {
1115                                         kprintf("IOAPIC: mixed mode for irq %d "
1116                                                 "trial failed: %d\n",
1117                                                 irq, error);
1118                                         goto nointr;
1119                                 } else {
1120                                         panic("IOAPIC: mixed mode for irq %d "
1121                                               "trial failed: %d\n", irq, error);
1122                                 }
1123                         } else {
1124                                 kprintf("IOAPIC: warning 8254 is not connected "
1125                                         "to the correct pin, try mixed mode\n");
1126                                 unregister_int(clkdesc, 0);
1127                                 goto mixed_mode_setup;
1128                         }
1129                 }
1130         }
1131         return;
1132
1133 nointr:
1134         i8254_nointr = 1; /* don't try to register again */
1135         cputimer_intr_deregister(cti);
1136 }
1137
1138 void
1139 setstatclockrate(int newhz)
1140 {
1141         if (newhz == RTC_PROFRATE)
1142                 rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1143         else
1144                 rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1145         writertc(RTC_STATUSA, rtc_statusa);
1146 }
1147
1148 #if 0
1149 static unsigned
1150 tsc_get_timecount(struct timecounter *tc)
1151 {
1152         return (rdtsc());
1153 }
1154 #endif
1155
1156 #ifdef KERN_TIMESTAMP
1157 #define KERN_TIMESTAMP_SIZE 16384
1158 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1159 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1160         sizeof(tsc), "LU", "Kernel timestamps");
1161 void  
1162 _TSTMP(u_int32_t x)
1163 {
1164         static int i;
1165
1166         tsc[i] = (u_int32_t)rdtsc();
1167         tsc[i+1] = x;
1168         i = i + 2;
1169         if (i >= KERN_TIMESTAMP_SIZE)
1170                 i = 0;
1171         tsc[i] = 0; /* mark last entry */
1172 }
1173 #endif /* KERN_TIMESTAMP */
1174
1175 /*
1176  *
1177  */
1178
1179 static int
1180 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1181 {
1182     sysclock_t count;
1183     __uint64_t tscval;
1184     char buf[32];
1185
1186     crit_enter();
1187     if (sys_cputimer == &i8254_cputimer)
1188         count = sys_cputimer->count();
1189     else
1190         count = 0;
1191     if (tsc_present)
1192         tscval = rdtsc();
1193     else
1194         tscval = 0;
1195     crit_exit();
1196     ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval);
1197     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1198 }
1199
1200 static uint64_t         tsc_mpsync_target;
1201
1202 static void
1203 tsc_mpsync_test_remote(void *arg __unused)
1204 {
1205         uint64_t tsc;
1206
1207         tsc = rdtsc();
1208         if (tsc < tsc_mpsync_target)
1209                 tsc_mpsync = 0;
1210 }
1211
1212 static void
1213 tsc_mpsync_test(void)
1214 {
1215         struct globaldata *gd = mycpu;
1216         uint64_t test_end, test_begin;
1217         u_int i;
1218
1219         if (!tsc_invariant) {
1220                 /* Not even invariant TSC */
1221                 return;
1222         }
1223
1224         if (ncpus == 1) {
1225                 /* Only one CPU */
1226                 tsc_mpsync = 1;
1227                 return;
1228         }
1229
1230         /*
1231          * Forcing can be used w/qemu to reduce contention
1232          */
1233         TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1234         if (tsc_mpsync) {
1235                 kprintf("TSC as cputimer forced\n");
1236                 return;
1237         }
1238
1239         if (cpu_vendor_id != CPU_VENDOR_INTEL) {
1240                 /* XXX only Intel works */
1241                 return;
1242         }
1243
1244         kprintf("TSC testing MP synchronization ...\n");
1245         tsc_mpsync = 1;
1246
1247         /* Run test for 100ms */
1248         test_begin = rdtsc();
1249         test_end = test_begin + (tsc_frequency / 10);
1250
1251 #define TSC_TEST_TRYMAX         1000000 /* Make sure we could stop */
1252
1253         for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1254                 struct lwkt_cpusync cs;
1255
1256                 crit_enter();
1257                 lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1258                                   tsc_mpsync_test_remote, NULL);
1259                 lwkt_cpusync_interlock(&cs);
1260                 tsc_mpsync_target = rdtsc();
1261                 cpu_mfence();
1262                 lwkt_cpusync_deinterlock(&cs);
1263                 crit_exit();
1264
1265                 if (!tsc_mpsync) {
1266                         kprintf("TSC is not MP synchronized @%u\n", i);
1267                         break;
1268                 }
1269                 if (tsc_mpsync_target > test_end)
1270                         break;
1271         }
1272
1273 #undef TSC_TEST_TRYMAX
1274
1275         if (tsc_mpsync) {
1276                 if (tsc_mpsync_target == test_begin) {
1277                         kprintf("TSC does not tick?!");
1278                         /* XXX disable TSC? */
1279                         tsc_invariant = 0;
1280                         tsc_mpsync = 0;
1281                         return;
1282                 }
1283
1284                 kprintf("TSC is MP synchronized");
1285                 if (bootverbose)
1286                         kprintf(", after %u tries", i);
1287                 kprintf("\n");
1288         }
1289 }
1290 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1291
1292 #define TSC_CPUTIMER_FREQMAX    128000000       /* 128Mhz */
1293
1294 static int tsc_cputimer_shift;
1295
1296 static void
1297 tsc_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
1298 {
1299         timer->base = 0;
1300         timer->base = oldclock - tsc_cputimer_count();
1301 }
1302
1303 static sysclock_t
1304 tsc_cputimer_count(void)
1305 {
1306         uint64_t tsc;
1307
1308         tsc = rdtsc();
1309         tsc >>= tsc_cputimer_shift;
1310
1311         return (tsc + tsc_cputimer.base);
1312 }
1313
1314 static void
1315 tsc_cputimer_register(void)
1316 {
1317         uint64_t freq;
1318         int enable = 1;
1319
1320         if (!tsc_mpsync)
1321                 return;
1322
1323         TUNABLE_INT_FETCH("hw.tsc_cputimer_enable", &enable);
1324         if (!enable)
1325                 return;
1326
1327         freq = tsc_frequency;
1328         while (freq > TSC_CPUTIMER_FREQMAX) {
1329                 freq >>= 1;
1330                 ++tsc_cputimer_shift;
1331         }
1332         kprintf("TSC: cputimer freq %ju, shift %d\n",
1333             (uintmax_t)freq, tsc_cputimer_shift);
1334
1335         tsc_cputimer.freq = freq;
1336
1337         cputimer_register(&tsc_cputimer);
1338         cputimer_select(&tsc_cputimer, 0);
1339 }
1340 SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST,
1341         tsc_cputimer_register, NULL);
1342
1343 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1344 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1345             "frequency");
1346 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1347             0, 0, hw_i8254_timestamp, "A", "");
1348
1349 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1350             &tsc_present, 0, "TSC Available");
1351 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1352             &tsc_invariant, 0, "Invariant TSC");
1353 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1354             &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1355 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1356             &tsc_frequency, 0, "TSC Frequency");