thread stage 7: Implement basic LWKTs, use a straight round-robin model for
[dragonfly.git] / sys / i386 / i386 / swtch.s
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * William Jolitz.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $
8ad65e08 37 * $DragonFly: src/sys/i386/i386/Attic/swtch.s,v 1.7 2003/06/20 02:09:50 dillon Exp $
984263bc
MD
38 */
39
40#include "npx.h"
41#include "opt_user_ldt.h"
42
43#include <sys/rtprio.h>
44
45#include <machine/asmacros.h>
46#include <machine/ipl.h>
47
48#ifdef SMP
49#include <machine/pmap.h>
50#include <machine/smptests.h> /** GRAB_LOPRIO */
51#include <machine/apic.h>
52#include <machine/lock.h>
53#endif /* SMP */
54
55#include "assym.s"
56
984263bc
MD
57 .data
58
984263bc
MD
59 .globl _panic
60
61#if defined(SWTCH_OPTIM_STATS)
62 .globl _swtch_optim_stats, _tlb_flush_count
63_swtch_optim_stats: .long 0 /* number of _swtch_optims */
64_tlb_flush_count: .long 0
65#endif
66
67 .text
68
984263bc
MD
69
70/*
8ad65e08
MD
71 * cpu_heavy_switch(next_thread)
72 *
73 * Switch from the current thread to a new thread. This entry
74 * is normally called via the thread->td_switch function, and will
75 * only be called when the current thread is a heavy weight process.
76 *
77 * YYY disable interrupts once giant is removed.
984263bc 78 */
8ad65e08 79ENTRY(cpu_heavy_switch)
84b592ba
MD
80 movl _curthread,%ecx
81 movl TD_PROC(%ecx),%ecx
984263bc 82
8ad65e08 83 cli
984263bc
MD
84#ifdef SMP
85 movb P_ONCPU(%ecx), %al /* save "last" cpu */
86 movb %al, P_LASTCPU(%ecx)
87 movb $0xff, P_ONCPU(%ecx) /* "leave" the cpu */
88#endif /* SMP */
89 movl P_VMSPACE(%ecx), %edx
90#ifdef SMP
91 movl _cpuid, %eax
92#else
93 xorl %eax, %eax
94#endif /* SMP */
95 btrl %eax, VM_PMAP+PM_ACTIVE(%edx)
96
8ad65e08
MD
97 /*
98 * Save general regs
99 */
100 movl P_THREAD(%ecx),%edx
b7c628e4 101 movl TD_PCB(%edx),%edx
984263bc
MD
102 movl (%esp),%eax /* Hardware registers */
103 movl %eax,PCB_EIP(%edx)
104 movl %ebx,PCB_EBX(%edx)
105 movl %esp,PCB_ESP(%edx)
106 movl %ebp,PCB_EBP(%edx)
107 movl %esi,PCB_ESI(%edx)
108 movl %edi,PCB_EDI(%edx)
109 movl %gs,PCB_GS(%edx)
110
8ad65e08
MD
111 /*
112 * Push the LWKT switch restore function, which resumes a heavy
113 * weight process. Note that the LWKT switcher is based on
114 * TD_SP, while the heavy weight process switcher is based on
115 * PCB_ESP. TD_SP is usually one pointer pushed relative to
116 * PCB_ESP.
117 */
118 movl P_THREAD(%ecx),%eax
119 pushl $cpu_heavy_restore
120 movl %esp,TD_SP(%eax)
121
122 /*
123 * Save debug regs if necessary
124 */
984263bc
MD
125 movb PCB_FLAGS(%edx),%al
126 andb $PCB_DBREGS,%al
127 jz 1f /* no, skip over */
128 movl %dr7,%eax /* yes, do the save */
129 movl %eax,PCB_DR7(%edx)
130 andl $0x0000fc00, %eax /* disable all watchpoints */
131 movl %eax,%dr7
132 movl %dr6,%eax
133 movl %eax,PCB_DR6(%edx)
134 movl %dr3,%eax
135 movl %eax,PCB_DR3(%edx)
136 movl %dr2,%eax
137 movl %eax,PCB_DR2(%edx)
138 movl %dr1,%eax
139 movl %eax,PCB_DR1(%edx)
140 movl %dr0,%eax
141 movl %eax,PCB_DR0(%edx)
1421:
143
8ad65e08
MD
144 /*
145 * Save BGL nesting count. Note that we hold the BGL with a
146 * count of at least 1 on entry to cpu_heavy_switch().
147 */
984263bc
MD
148#ifdef SMP
149 movl _mp_lock, %eax
150 /* XXX FIXME: we should be saving the local APIC TPR */
151#ifdef DIAGNOSTIC
152 cmpl $FREE_LOCK, %eax /* is it free? */
153 je badsw4 /* yes, bad medicine! */
154#endif /* DIAGNOSTIC */
155 andl $COUNT_FIELD, %eax /* clear CPU portion */
156 movl %eax, PCB_MPNEST(%edx) /* store it */
157#endif /* SMP */
158
8ad65e08
MD
159 /*
160 * Save the FP state if we have used the FP.
161 */
984263bc 162#if NNPX > 0
263e4574 163 movl P_THREAD(%ecx),%ecx
af0bff84 164 cmpl %ecx,_npxthread
984263bc
MD
165 jne 1f
166 addl $PCB_SAVEFPU,%edx /* h/w bugs make saving complicated */
167 pushl %edx
168 call _npxsave /* do it in a big C function */
169 popl %eax
1701:
af0bff84 171 /* %ecx,%edx trashed */
984263bc
MD
172#endif /* NNPX > 0 */
173
84b592ba 174 /*
8ad65e08
MD
175 * Switch to the next thread, which was passed as an argument
176 * to cpu_heavy_switch(). Due to the switch-restore function we pushed,
177 * the argument is at 8(%esp). Set the current thread, load the
178 * stack pointer, and 'ret' into the switch-restore function.
84b592ba 179 */
8ad65e08
MD
180 movl 8(%esp),%eax
181 movl %eax,_curthread
182 movl TD_SP(%eax),%esp
183 ret
984263bc 184
8ad65e08
MD
185/*
186 * cpu_exit_switch()
187 *
188 * The switch function is changed to this when a thread is going away
189 * for good. We have to ensure that the MMU state is not cached, and
190 * we don't bother saving the existing thread state before switching.
191 */
192ENTRY(cpu_exit_switch)
193 movl _IdlePTD,%ecx
194 movl %cr3,%eax
195 cmpl %ecx,%eax
196 je 1f
197 movl %ecx,%cr3
984263bc 1981:
8ad65e08
MD
199 cli
200 movl 4(%esp),%eax
201 movl %eax,_curthread
202 movl TD_SP(%eax),%esp
203 ret
984263bc 204
8ad65e08
MD
205/*
206 * cpu_heavy_restore() (current thread in %eax on entry)
207 *
208 * Restore the thread after an LWKT switch. This entry is normally
209 * called via the LWKT switch restore function, which was pulled
210 * off the thread stack and jumped to.
211 *
212 * This entry is only called if the thread was previously saved
213 * using cpu_heavy_switch() (the heavy weight process thread switcher).
214 *
215 * YYY theoretically we do not have to restore everything here, a lot
216 * of this junk can wait until we return to usermode. But for now
217 * we restore everything.
218 *
219 * YYY STI/CLI sequencing.
220 */
221ENTRY(cpu_heavy_restore)
222 /* interrupts are disabled */
223 movl TD_PCB(%eax),%edx
224 movl TD_PROC(%eax),%ecx
984263bc 225#ifdef DIAGNOSTIC
984263bc
MD
226 cmpb $SRUN,P_STAT(%ecx)
227 jne badsw2
228#endif
984263bc
MD
229
230#if defined(SWTCH_OPTIM_STATS)
231 incl _swtch_optim_stats
232#endif
8ad65e08
MD
233 /*
234 * Restore the MMU address space
235 */
984263bc
MD
236 movl %cr3,%ebx
237 cmpl PCB_CR3(%edx),%ebx
238 je 4f
239#if defined(SWTCH_OPTIM_STATS)
240 decl _swtch_optim_stats
241 incl _tlb_flush_count
242#endif
243 movl PCB_CR3(%edx),%ebx
244 movl %ebx,%cr3
2454:
246
8ad65e08
MD
247 /*
248 * Deal with the PCB extension, restore the private tss
249 */
984263bc
MD
250#ifdef SMP
251 movl _cpuid, %esi
252#else
253 xorl %esi, %esi
254#endif
255 cmpl $0, PCB_EXT(%edx) /* has pcb extension? */
256 je 1f
257 btsl %esi, _private_tss /* mark use of private tss */
258 movl PCB_EXT(%edx), %edi /* new tss descriptor */
259 jmp 2f
2601:
261
b7c628e4
MD
262 /*
263 * update common_tss.tss_esp0 pointer. This is the supervisor
264 * stack pointer on entry from user mode. Since the pcb is
265 * at the top of the supervisor stack esp0 starts just below it.
266 * We leave enough space for vm86 (16 bytes).
8ad65e08
MD
267 *
268 * common_tss.tss_esp0 is needed when user mode traps into the
269 * kernel.
b7c628e4
MD
270 */
271 leal -16(%edx),%ebx
984263bc
MD
272 movl %ebx, _common_tss + TSS_ESP0
273
274 btrl %esi, _private_tss
275 jae 3f
276#ifdef SMP
277 movl $gd_common_tssd, %edi
278 addl %fs:0, %edi
279#else
280 movl $_common_tssd, %edi
281#endif
8ad65e08
MD
282 /*
283 * Move the correct TSS descriptor into the GDT slot, then reload
284 * tr. YYY not sure what is going on here
285 */
984263bc 2862:
984263bc
MD
287 movl _tss_gdt, %ebx /* entry in GDT */
288 movl 0(%edi), %eax
289 movl %eax, 0(%ebx)
290 movl 4(%edi), %eax
291 movl %eax, 4(%ebx)
292 movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
293 ltr %si
8ad65e08
MD
294
295 /*
296 * Tell the pmap that our cpu is using the VMSPACE now.
297 */
984263bc
MD
2983:
299 movl P_VMSPACE(%ecx), %ebx
300#ifdef SMP
301 movl _cpuid, %eax
302#else
303 xorl %eax, %eax
304#endif
305 btsl %eax, VM_PMAP+PM_ACTIVE(%ebx)
306
8ad65e08
MD
307 /*
308 * Restore general registers.
309 */
984263bc
MD
310 movl PCB_EBX(%edx),%ebx
311 movl PCB_ESP(%edx),%esp
312 movl PCB_EBP(%edx),%ebp
313 movl PCB_ESI(%edx),%esi
314 movl PCB_EDI(%edx),%edi
315 movl PCB_EIP(%edx),%eax
316 movl %eax,(%esp)
317
8ad65e08
MD
318 /*
319 * SMP ickyness to direct interrupts.
320 */
321
984263bc
MD
322#ifdef SMP
323#ifdef GRAB_LOPRIO /* hold LOPRIO for INTs */
324#ifdef CHEAP_TPR
325 movl $0, lapic_tpr
326#else
327 andl $~APIC_TPR_PRIO, lapic_tpr
328#endif /** CHEAP_TPR */
329#endif /** GRAB_LOPRIO */
330 movl _cpuid,%eax
331 movb %al, P_ONCPU(%ecx)
332#endif /* SMP */
984263bc 333
8ad65e08
MD
334 /*
335 * Restore the BGL nesting count. Note that the nesting count will
336 * be at least 1.
337 */
984263bc
MD
338#ifdef SMP
339 movl _cpu_lockid, %eax
340 orl PCB_MPNEST(%edx), %eax /* add next count from PROC */
341 movl %eax, _mp_lock /* load the mp_lock */
342 /* XXX FIXME: we should be restoring the local APIC TPR */
343#endif /* SMP */
344
8ad65e08
MD
345 /*
346 * Restore the user LDT if we have one
347 */
984263bc
MD
348#ifdef USER_LDT
349 cmpl $0, PCB_USERLDT(%edx)
350 jnz 1f
351 movl __default_ldt,%eax
352 cmpl _currentldt,%eax
353 je 2f
354 lldt __default_ldt
355 movl %eax,_currentldt
356 jmp 2f
3571: pushl %edx
358 call _set_user_ldt
359 popl %edx
3602:
361#endif
8ad65e08
MD
362 /*
363 * Restore the %gs segment register, which must be done after
364 * loading the user LDT. Since user processes can modify the
365 * register via procfs, this may result in a fault which is
366 * detected by checking the fault address against cpu_switch_load_gs
367 * in i386/i386/trap.c
368 */
984263bc
MD
369 .globl cpu_switch_load_gs
370cpu_switch_load_gs:
371 movl PCB_GS(%edx),%gs
372
8ad65e08
MD
373 /*
374 * Restore the DEBUG register state if necessary.
375 */
984263bc
MD
376 movb PCB_FLAGS(%edx),%al
377 andb $PCB_DBREGS,%al
378 jz 1f /* no, skip over */
379 movl PCB_DR6(%edx),%eax /* yes, do the restore */
380 movl %eax,%dr6
381 movl PCB_DR3(%edx),%eax
382 movl %eax,%dr3
383 movl PCB_DR2(%edx),%eax
384 movl %eax,%dr2
385 movl PCB_DR1(%edx),%eax
386 movl %eax,%dr1
387 movl PCB_DR0(%edx),%eax
388 movl %eax,%dr0
389 movl %dr7,%eax /* load dr7 so as not to disturb */
390 andl $0x0000fc00,%eax /* reserved bits */
391 pushl %ebx
392 movl PCB_DR7(%edx),%ebx
393 andl $~0x0000fc00,%ebx
394 orl %ebx,%eax
395 popl %ebx
396 movl %eax,%dr7
3971:
8ad65e08
MD
398#if 0
399 /*
400 * Remove the heavy weight process from the heavy weight queue.
401 * this will also have the side effect of removing the thread from
402 * the run queue. YYY temporary?
403 *
404 * LWKT threads stay on the run queue until explicitly removed.
405 */
406 pushl %ecx
407 call remrunqueue
408 addl $4,%esp
409#endif
984263bc 410
8ad65e08 411 sti /* XXX */
984263bc
MD
412 ret
413
414CROSSJUMPTARGET(sw1a)
415
416#ifdef DIAGNOSTIC
417badsw1:
418 pushl $sw0_1
419 call _panic
420
421sw0_1: .asciz "cpu_switch: has wchan"
422
423badsw2:
424 pushl $sw0_2
425 call _panic
426
427sw0_2: .asciz "cpu_switch: not SRUN"
428#endif
429
430#if defined(SMP) && defined(DIAGNOSTIC)
431badsw4:
432 pushl $sw0_4
433 call _panic
434
435sw0_4: .asciz "cpu_switch: do not have lock"
436#endif /* SMP && DIAGNOSTIC */
437
438/*
439 * savectx(pcb)
440 * Update pcb, saving current processor state.
441 */
442ENTRY(savectx)
443 /* fetch PCB */
444 movl 4(%esp),%ecx
445
446 /* caller's return address - child won't execute this routine */
447 movl (%esp),%eax
448 movl %eax,PCB_EIP(%ecx)
449
450 movl %cr3,%eax
451 movl %eax,PCB_CR3(%ecx)
452
453 movl %ebx,PCB_EBX(%ecx)
454 movl %esp,PCB_ESP(%ecx)
455 movl %ebp,PCB_EBP(%ecx)
456 movl %esi,PCB_ESI(%ecx)
457 movl %edi,PCB_EDI(%ecx)
458 movl %gs,PCB_GS(%ecx)
459
460#if NNPX > 0
461 /*
af0bff84 462 * If npxthread == NULL, then the npx h/w state is irrelevant and the
984263bc
MD
463 * state had better already be in the pcb. This is true for forks
464 * but not for dumps (the old book-keeping with FP flags in the pcb
465 * always lost for dumps because the dump pcb has 0 flags).
466 *
af0bff84
MD
467 * If npxthread != NULL, then we have to save the npx h/w state to
468 * npxthread's pcb and copy it to the requested pcb, or save to the
984263bc
MD
469 * requested pcb and reload. Copying is easier because we would
470 * have to handle h/w bugs for reloading. We used to lose the
471 * parent's npx state for forks by forgetting to reload.
472 */
af0bff84 473 movl _npxthread,%eax
984263bc
MD
474 testl %eax,%eax
475 je 1f
476
477 pushl %ecx
b7c628e4 478 movl TD_PCB(%eax),%eax
984263bc
MD
479 leal PCB_SAVEFPU(%eax),%eax
480 pushl %eax
481 pushl %eax
482 call _npxsave
483 addl $4,%esp
484 popl %eax
485 popl %ecx
486
487 pushl $PCB_SAVEFPU_SIZE
488 leal PCB_SAVEFPU(%ecx),%ecx
489 pushl %ecx
490 pushl %eax
491 call _bcopy
492 addl $12,%esp
493#endif /* NNPX > 0 */
494
4951:
496 ret
8ad65e08
MD
497
498/*
499 * cpu_idle_restore() (current thread in %eax on entry)
500 *
501 * Don't bother setting up any regs other then %ebp so backtraces
502 * don't die. This restore function is used to bootstrap into the
503 * cpu_idle() LWKT only, after that cpu_lwkt_*() will be used for
504 * switching.
505 */
506ENTRY(cpu_idle_restore)
507 movl $0,%ebp
508 pushl $0
509 jmp cpu_idle
510
511/*
512 * cpu_lwkt_switch()
513 *
514 * Standard LWKT switching function. Only non-scratch registers are
515 * saved and we don't bother with the MMU state or anything else.
516 * YYY BGL, SPL
517 */
518ENTRY(cpu_lwkt_switch)
519 movl 4(%esp),%eax
520 pushl %ebp
521 pushl %ebx
522 pushl %esi
523 pushl %edi
524 pushfl
525 movl _curthread,%ecx
526 pushl $cpu_lwkt_restore
527 cli
528 movl %esp,TD_SP(%ecx)
529 movl %eax,_curthread
530 movl TD_SP(%eax),%esp
531 ret
532
533/*
534 * cpu_idle_restore() (current thread in %eax on entry)
535 *
536 * Don't bother setting up any regs other then %ebp so backtraces
537 * don't die.
538 */
539ENTRY(cpu_lwkt_restore)
540 popfl
541 popl %edi
542 popl %esi
543 popl %ebx
544 popl %ebp
545 ret
546