MP Implementation 1/2: Get the APIC code working again, sweetly integrate the
[dragonfly.git] / sys / i386 / i386 / swtch.s
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
f1d1c3fa 4 * LWKT threads Copyright (c) 2003 Matthew Dillon
984263bc
MD
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $
8a8d5d85 38 * $DragonFly: src/sys/i386/i386/Attic/swtch.s,v 1.21 2003/07/06 21:23:48 dillon Exp $
984263bc
MD
39 */
40
41#include "npx.h"
42#include "opt_user_ldt.h"
43
44#include <sys/rtprio.h>
45
46#include <machine/asmacros.h>
47#include <machine/ipl.h>
48
49#ifdef SMP
50#include <machine/pmap.h>
51#include <machine/smptests.h> /** GRAB_LOPRIO */
52#include <machine/apic.h>
53#include <machine/lock.h>
54#endif /* SMP */
55
56#include "assym.s"
57
984263bc
MD
58 .data
59
2954c92f 60 .globl panic
984263bc
MD
61
62#if defined(SWTCH_OPTIM_STATS)
2954c92f
MD
63 .globl swtch_optim_stats, tlb_flush_count
64swtch_optim_stats: .long 0 /* number of _swtch_optims */
65tlb_flush_count: .long 0
984263bc
MD
66#endif
67
68 .text
69
984263bc
MD
70
71/*
8ad65e08
MD
72 * cpu_heavy_switch(next_thread)
73 *
74 * Switch from the current thread to a new thread. This entry
75 * is normally called via the thread->td_switch function, and will
76 * only be called when the current thread is a heavy weight process.
77 *
78 * YYY disable interrupts once giant is removed.
984263bc 79 */
8ad65e08 80ENTRY(cpu_heavy_switch)
2954c92f 81 movl PCPU(curthread),%ecx
84b592ba 82 movl TD_PROC(%ecx),%ecx
984263bc 83
8ad65e08 84 cli
984263bc 85 movl P_VMSPACE(%ecx), %edx
72740893 86 movl PCPU(cpuid), %eax
984263bc
MD
87 btrl %eax, VM_PMAP+PM_ACTIVE(%edx)
88
8ad65e08
MD
89 /*
90 * Save general regs
91 */
92 movl P_THREAD(%ecx),%edx
b7c628e4 93 movl TD_PCB(%edx),%edx
984263bc
MD
94 movl (%esp),%eax /* Hardware registers */
95 movl %eax,PCB_EIP(%edx)
96 movl %ebx,PCB_EBX(%edx)
97 movl %esp,PCB_ESP(%edx)
98 movl %ebp,PCB_EBP(%edx)
99 movl %esi,PCB_ESI(%edx)
100 movl %edi,PCB_EDI(%edx)
101 movl %gs,PCB_GS(%edx)
102
8ad65e08
MD
103 /*
104 * Push the LWKT switch restore function, which resumes a heavy
105 * weight process. Note that the LWKT switcher is based on
106 * TD_SP, while the heavy weight process switcher is based on
107 * PCB_ESP. TD_SP is usually one pointer pushed relative to
108 * PCB_ESP.
109 */
110 movl P_THREAD(%ecx),%eax
111 pushl $cpu_heavy_restore
112 movl %esp,TD_SP(%eax)
113
114 /*
115 * Save debug regs if necessary
116 */
984263bc
MD
117 movb PCB_FLAGS(%edx),%al
118 andb $PCB_DBREGS,%al
119 jz 1f /* no, skip over */
120 movl %dr7,%eax /* yes, do the save */
121 movl %eax,PCB_DR7(%edx)
122 andl $0x0000fc00, %eax /* disable all watchpoints */
123 movl %eax,%dr7
124 movl %dr6,%eax
125 movl %eax,PCB_DR6(%edx)
126 movl %dr3,%eax
127 movl %eax,PCB_DR3(%edx)
128 movl %dr2,%eax
129 movl %eax,PCB_DR2(%edx)
130 movl %dr1,%eax
131 movl %eax,PCB_DR1(%edx)
132 movl %dr0,%eax
133 movl %eax,PCB_DR0(%edx)
1341:
135
8ad65e08
MD
136 /*
137 * Save the FP state if we have used the FP.
138 */
984263bc 139#if NNPX > 0
263e4574 140 movl P_THREAD(%ecx),%ecx
2954c92f 141 cmpl %ecx,PCPU(npxthread)
984263bc
MD
142 jne 1f
143 addl $PCB_SAVEFPU,%edx /* h/w bugs make saving complicated */
144 pushl %edx
2954c92f 145 call npxsave /* do it in a big C function */
984263bc
MD
146 popl %eax
1471:
af0bff84 148 /* %ecx,%edx trashed */
984263bc
MD
149#endif /* NNPX > 0 */
150
84b592ba 151 /*
8ad65e08
MD
152 * Switch to the next thread, which was passed as an argument
153 * to cpu_heavy_switch(). Due to the switch-restore function we pushed,
154 * the argument is at 8(%esp). Set the current thread, load the
155 * stack pointer, and 'ret' into the switch-restore function.
84b592ba 156 */
8ad65e08 157 movl 8(%esp),%eax
2954c92f 158 movl %eax,PCPU(curthread)
8ad65e08
MD
159 movl TD_SP(%eax),%esp
160 ret
984263bc 161
8ad65e08
MD
162/*
163 * cpu_exit_switch()
164 *
165 * The switch function is changed to this when a thread is going away
166 * for good. We have to ensure that the MMU state is not cached, and
167 * we don't bother saving the existing thread state before switching.
ae8050a4
MD
168 *
169 * At this point we are in a critical section and this cpu owns the
170 * thread's token, which serves as an interlock until the switchout is
171 * complete.
8ad65e08
MD
172 */
173ENTRY(cpu_exit_switch)
ae8050a4
MD
174 /*
175 * Get us out of the vmspace
176 */
2954c92f 177 movl IdlePTD,%ecx
8ad65e08
MD
178 movl %cr3,%eax
179 cmpl %ecx,%eax
180 je 1f
181 movl %ecx,%cr3
984263bc 1821:
5fb1f500 183 movl PCPU(curthread),%ecx
ae8050a4
MD
184 /*
185 * Switch to the next thread.
186 */
8ad65e08
MD
187 cli
188 movl 4(%esp),%eax
2954c92f 189 movl %eax,PCPU(curthread)
8ad65e08 190 movl TD_SP(%eax),%esp
ae8050a4
MD
191
192 /*
99df837e
MD
193 * We are now the next thread, set the exited flag and wakeup
194 * any waiters.
ae8050a4 195 */
99df837e 196 orl $TDF_EXITED,TD_FLAGS(%ecx)
8a8d5d85 197#if 0 /* YYY MP lock may not be held by new target */
ae8050a4 198 pushl %eax
99df837e
MD
199 pushl %ecx /* wakeup(oldthread) */
200 call wakeup
ae8050a4 201 addl $4,%esp
99df837e 202 popl %eax /* note: next thread expects curthread in %eax */
8a8d5d85 203#endif
ae8050a4
MD
204
205 /*
206 * Restore the next thread's state and resume it. Note: the
207 * restore function assumes that the next thread's address is
208 * in %eax.
209 */
8ad65e08 210 ret
984263bc 211
8ad65e08
MD
212/*
213 * cpu_heavy_restore() (current thread in %eax on entry)
214 *
215 * Restore the thread after an LWKT switch. This entry is normally
216 * called via the LWKT switch restore function, which was pulled
217 * off the thread stack and jumped to.
218 *
219 * This entry is only called if the thread was previously saved
220 * using cpu_heavy_switch() (the heavy weight process thread switcher).
221 *
222 * YYY theoretically we do not have to restore everything here, a lot
223 * of this junk can wait until we return to usermode. But for now
224 * we restore everything.
225 *
226 * YYY STI/CLI sequencing.
7d0bac62
MD
227 *
228 * YYY note: spl check is done in mi_switch when it splx()'s.
8ad65e08 229 */
26a0694b 230
8ad65e08
MD
231ENTRY(cpu_heavy_restore)
232 /* interrupts are disabled */
8f41e33b 233 movl TD_PCB(%eax),%edx
8ad65e08 234 movl TD_PROC(%eax),%ecx
984263bc 235#ifdef DIAGNOSTIC
984263bc
MD
236 cmpb $SRUN,P_STAT(%ecx)
237 jne badsw2
238#endif
984263bc
MD
239
240#if defined(SWTCH_OPTIM_STATS)
241 incl _swtch_optim_stats
242#endif
8ad65e08
MD
243 /*
244 * Restore the MMU address space
245 */
984263bc
MD
246 movl %cr3,%ebx
247 cmpl PCB_CR3(%edx),%ebx
248 je 4f
249#if defined(SWTCH_OPTIM_STATS)
250 decl _swtch_optim_stats
251 incl _tlb_flush_count
252#endif
253 movl PCB_CR3(%edx),%ebx
254 movl %ebx,%cr3
2554:
256
8ad65e08
MD
257 /*
258 * Deal with the PCB extension, restore the private tss
259 */
72740893 260 movl PCPU(cpuid), %esi
984263bc
MD
261 cmpl $0, PCB_EXT(%edx) /* has pcb extension? */
262 je 1f
2954c92f 263 btsl %esi, private_tss /* mark use of private tss */
984263bc
MD
264 movl PCB_EXT(%edx), %edi /* new tss descriptor */
265 jmp 2f
2661:
267
b7c628e4
MD
268 /*
269 * update common_tss.tss_esp0 pointer. This is the supervisor
270 * stack pointer on entry from user mode. Since the pcb is
271 * at the top of the supervisor stack esp0 starts just below it.
272 * We leave enough space for vm86 (16 bytes).
8ad65e08
MD
273 *
274 * common_tss.tss_esp0 is needed when user mode traps into the
275 * kernel.
b7c628e4
MD
276 */
277 leal -16(%edx),%ebx
2954c92f 278 movl %ebx, PCPU(common_tss) + TSS_ESP0
984263bc 279
2954c92f 280 btrl %esi, private_tss
984263bc 281 jae 3f
17a9f566
MD
282
283 /*
284 * There is no way to get the address of a segment-accessed variable
285 * so we store a self-referential pointer at the base of the per-cpu
286 * data area and add the appropriate offset.
287 */
984263bc
MD
288 movl $gd_common_tssd, %edi
289 addl %fs:0, %edi
17a9f566 290
8ad65e08
MD
291 /*
292 * Move the correct TSS descriptor into the GDT slot, then reload
293 * tr. YYY not sure what is going on here
294 */
984263bc 2952:
2954c92f 296 movl PCPU(tss_gdt), %ebx /* entry in GDT */
984263bc
MD
297 movl 0(%edi), %eax
298 movl %eax, 0(%ebx)
299 movl 4(%edi), %eax
300 movl %eax, 4(%ebx)
301 movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
302 ltr %si
8ad65e08
MD
303
304 /*
305 * Tell the pmap that our cpu is using the VMSPACE now.
306 */
984263bc
MD
3073:
308 movl P_VMSPACE(%ecx), %ebx
2954c92f 309 movl PCPU(cpuid), %eax
984263bc
MD
310 btsl %eax, VM_PMAP+PM_ACTIVE(%ebx)
311
8ad65e08
MD
312 /*
313 * Restore general registers.
314 */
984263bc
MD
315 movl PCB_EBX(%edx),%ebx
316 movl PCB_ESP(%edx),%esp
317 movl PCB_EBP(%edx),%ebp
318 movl PCB_ESI(%edx),%esi
319 movl PCB_EDI(%edx),%edi
320 movl PCB_EIP(%edx),%eax
321 movl %eax,(%esp)
322
8ad65e08
MD
323 /*
324 * Restore the user LDT if we have one
325 */
984263bc
MD
326#ifdef USER_LDT
327 cmpl $0, PCB_USERLDT(%edx)
328 jnz 1f
2954c92f
MD
329 movl _default_ldt,%eax
330 cmpl PCPU(currentldt),%eax
984263bc 331 je 2f
2954c92f
MD
332 lldt _default_ldt
333 movl %eax,PCPU(currentldt)
984263bc
MD
334 jmp 2f
3351: pushl %edx
2954c92f 336 call set_user_ldt
984263bc
MD
337 popl %edx
3382:
339#endif
8ad65e08
MD
340 /*
341 * Restore the %gs segment register, which must be done after
342 * loading the user LDT. Since user processes can modify the
343 * register via procfs, this may result in a fault which is
344 * detected by checking the fault address against cpu_switch_load_gs
345 * in i386/i386/trap.c
346 */
984263bc
MD
347 .globl cpu_switch_load_gs
348cpu_switch_load_gs:
349 movl PCB_GS(%edx),%gs
350
8ad65e08
MD
351 /*
352 * Restore the DEBUG register state if necessary.
353 */
984263bc
MD
354 movb PCB_FLAGS(%edx),%al
355 andb $PCB_DBREGS,%al
356 jz 1f /* no, skip over */
357 movl PCB_DR6(%edx),%eax /* yes, do the restore */
358 movl %eax,%dr6
359 movl PCB_DR3(%edx),%eax
360 movl %eax,%dr3
361 movl PCB_DR2(%edx),%eax
362 movl %eax,%dr2
363 movl PCB_DR1(%edx),%eax
364 movl %eax,%dr1
365 movl PCB_DR0(%edx),%eax
366 movl %eax,%dr0
367 movl %dr7,%eax /* load dr7 so as not to disturb */
368 andl $0x0000fc00,%eax /* reserved bits */
369 pushl %ebx
370 movl PCB_DR7(%edx),%ebx
371 andl $~0x0000fc00,%ebx
372 orl %ebx,%eax
373 popl %ebx
374 movl %eax,%dr7
3751:
376
8ad65e08 377 sti /* XXX */
984263bc
MD
378 ret
379
380CROSSJUMPTARGET(sw1a)
381
ef0fdad1
MD
382badsw0:
383 pushl %eax
384 pushl $sw0_1
2954c92f 385 call panic
ef0fdad1
MD
386
387sw0_1: .asciz "cpu_switch: panic: %p"
388
984263bc
MD
389#ifdef DIAGNOSTIC
390badsw1:
391 pushl $sw0_1
2954c92f 392 call panic
984263bc
MD
393
394sw0_1: .asciz "cpu_switch: has wchan"
395
396badsw2:
397 pushl $sw0_2
2954c92f 398 call panic
984263bc
MD
399
400sw0_2: .asciz "cpu_switch: not SRUN"
401#endif
402
403#if defined(SMP) && defined(DIAGNOSTIC)
404badsw4:
405 pushl $sw0_4
2954c92f 406 call panic
984263bc
MD
407
408sw0_4: .asciz "cpu_switch: do not have lock"
409#endif /* SMP && DIAGNOSTIC */
410
0cfcada1
MD
411string: .asciz "SWITCHING\n"
412
984263bc
MD
413/*
414 * savectx(pcb)
415 * Update pcb, saving current processor state.
416 */
417ENTRY(savectx)
418 /* fetch PCB */
419 movl 4(%esp),%ecx
420
421 /* caller's return address - child won't execute this routine */
422 movl (%esp),%eax
423 movl %eax,PCB_EIP(%ecx)
424
425 movl %cr3,%eax
426 movl %eax,PCB_CR3(%ecx)
427
428 movl %ebx,PCB_EBX(%ecx)
429 movl %esp,PCB_ESP(%ecx)
430 movl %ebp,PCB_EBP(%ecx)
431 movl %esi,PCB_ESI(%ecx)
432 movl %edi,PCB_EDI(%ecx)
433 movl %gs,PCB_GS(%ecx)
434
435#if NNPX > 0
436 /*
af0bff84 437 * If npxthread == NULL, then the npx h/w state is irrelevant and the
984263bc
MD
438 * state had better already be in the pcb. This is true for forks
439 * but not for dumps (the old book-keeping with FP flags in the pcb
440 * always lost for dumps because the dump pcb has 0 flags).
441 *
af0bff84
MD
442 * If npxthread != NULL, then we have to save the npx h/w state to
443 * npxthread's pcb and copy it to the requested pcb, or save to the
984263bc
MD
444 * requested pcb and reload. Copying is easier because we would
445 * have to handle h/w bugs for reloading. We used to lose the
446 * parent's npx state for forks by forgetting to reload.
447 */
2954c92f 448 movl PCPU(npxthread),%eax
984263bc
MD
449 testl %eax,%eax
450 je 1f
451
452 pushl %ecx
b7c628e4 453 movl TD_PCB(%eax),%eax
984263bc
MD
454 leal PCB_SAVEFPU(%eax),%eax
455 pushl %eax
456 pushl %eax
2954c92f 457 call npxsave
984263bc
MD
458 addl $4,%esp
459 popl %eax
460 popl %ecx
461
462 pushl $PCB_SAVEFPU_SIZE
463 leal PCB_SAVEFPU(%ecx),%ecx
464 pushl %ecx
465 pushl %eax
2954c92f 466 call bcopy
984263bc
MD
467 addl $12,%esp
468#endif /* NNPX > 0 */
469
4701:
471 ret
8ad65e08
MD
472
473/*
474 * cpu_idle_restore() (current thread in %eax on entry)
475 *
476 * Don't bother setting up any regs other then %ebp so backtraces
477 * don't die. This restore function is used to bootstrap into the
478 * cpu_idle() LWKT only, after that cpu_lwkt_*() will be used for
479 * switching.
72740893
MD
480 *
481 * If we are an AP we have to call ap_init() before jumping to
482 * cpu_idle(). ap_init() will synchronize with the BP and finish
483 * setting up various ncpu-dependant globaldata fields. This may
484 * happen on UP as well as SMP if we happen to be simulating multiple
485 * cpus.
8ad65e08
MD
486 */
487ENTRY(cpu_idle_restore)
488 movl $0,%ebp
489 pushl $0
72740893
MD
490#ifdef SMP
491 cmpl $0,PCPU(cpuid)
492 je 1f
493 call ap_init
4941:
495#endif
ef0fdad1 496 sti
8ad65e08
MD
497 jmp cpu_idle
498
0cfcada1
MD
499/*
500 * cpu_kthread_restore() (current thread is %eax on entry)
501 *
502 * Don't bother setting up any regs other then %ebp so backtraces
503 * don't die. This restore function is used to bootstrap into an
504 * LWKT based kernel thread only. cpu_lwkt_switch() will be used
505 * after this.
26a0694b
MD
506 *
507 * Since all of our context is on the stack we are reentrant and
508 * we can release our critical section and enable interrupts early.
0cfcada1
MD
509 */
510ENTRY(cpu_kthread_restore)
511 movl TD_PCB(%eax),%ebx
512 movl $0,%ebp
26a0694b 513 subl $TDPRI_CRIT,TD_PRI(%eax)
ef0fdad1 514 sti
0cfcada1
MD
515 popl %edx /* kthread exit function */
516 pushl PCB_EBX(%ebx) /* argument to ESI function */
517 pushl %edx /* set exit func as return address */
518 movl PCB_ESI(%ebx),%eax
519 jmp *%eax
520
8ad65e08
MD
521/*
522 * cpu_lwkt_switch()
523 *
524 * Standard LWKT switching function. Only non-scratch registers are
525 * saved and we don't bother with the MMU state or anything else.
26a0694b
MD
526 *
527 * This function is always called while in a critical section.
528 *
8ad65e08
MD
529 * YYY BGL, SPL
530 */
531ENTRY(cpu_lwkt_switch)
532 movl 4(%esp),%eax
533 pushl %ebp
534 pushl %ebx
535 pushl %esi
536 pushl %edi
537 pushfl
2954c92f 538 movl PCPU(curthread),%ecx
8ad65e08
MD
539 pushl $cpu_lwkt_restore
540 cli
541 movl %esp,TD_SP(%ecx)
2954c92f 542 movl %eax,PCPU(curthread)
8ad65e08
MD
543 movl TD_SP(%eax),%esp
544 ret
545
546/*
26a0694b 547 * cpu_lwkt_restore() (current thread in %eax on entry)
8ad65e08 548 *
26a0694b
MD
549 * Standard LWKT restore function. This function is always called
550 * while in a critical section.
551 *
552 * Warning: due to preemption the restore function can be used to
553 * 'return' to the original thread. Interrupt disablement must be
554 * protected through the switch so we cannot run splz here.
8ad65e08
MD
555 */
556ENTRY(cpu_lwkt_restore)
557 popfl
558 popl %edi
559 popl %esi
560 popl %ebx
561 popl %ebp
562 ret
563