2 * Copyright (c) 1993 The Regents of the University of California.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * $FreeBSD: src/sys/i386/i386/support.s,v 1.67.2.5 2001/08/15 01:23:50 peter Exp $
34 * $DragonFly: src/sys/platform/pc32/i386/support.s,v 1.4 2003/06/18 07:04:25 dillon Exp $
39 #include <machine/asmacros.h>
40 #include <machine/cputypes.h>
41 #include <machine/pmap.h>
42 #include <machine/specialreg.h>
58 .globl _copyout_vector
60 .long _generic_copyout
61 .globl _ovbcopy_vector
64 #if defined(I586_CPU) && NNPX > 0
74 * void bzero(void *buf, u_int len)
99 * do 64 byte chunks first
101 * XXX this is probably over-unrolled at least for DX2's
158 * a jump table seems to be faster than a loop or more range reductions
160 * XXX need a const section for non-text
195 #if defined(I586_CPU) && NNPX > 0
201 * The FPU register method is twice as fast as the integer register
202 * method unless the target is in the L1 cache and we pre-allocate a
203 * cache line for it (then the integer register method is 4-5 times
204 * faster). However, we never pre-allocate cache lines, since that
205 * would make the integer method 25% or more slower for the common
206 * case when the target isn't in either the L1 cache or the L2 cache.
207 * Thus we normally use the FPU register method unless the overhead
208 * would be too large.
210 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */
214 * The FPU registers may belong to an application or to fastmove()
215 * or to another invocation of bcopy() or ourself in a higher level
216 * interrupt or trap handler. Preserving the registers is
217 * complicated since we avoid it if possible at all levels. We
218 * want to localize the complications even when that increases them.
219 * Here the extra work involves preserving CR0_TS in TS.
220 * `npxthread != NULL' is supposed to be the condition that all the
221 * FPU resources belong to an application, but npxthread and CR0_TS
222 * aren't set atomically enough for this condition to work in
223 * interrupt handlers.
225 * Case 1: FPU registers belong to the application: we must preserve
226 * the registers if we use them, so we only use the FPU register
227 * method if the target size is large enough to amortize the extra
228 * overhead for preserving them. CR0_TS must be preserved although
229 * it is very likely to end up as set.
231 * Case 2: FPU registers belong to fastmove(): fastmove() currently
232 * makes the registers look like they belong to an application so
233 * that cpu_switch() and savectx() don't have to know about it, so
234 * this case reduces to case 1.
236 * Case 3: FPU registers belong to the kernel: don't use the FPU
237 * register method. This case is unlikely, and supporting it would
238 * be more complicated and might take too much stack.
240 * Case 4: FPU registers don't belong to anyone: the FPU registers
241 * don't need to be preserved, so we always use the FPU register
242 * method. CR0_TS must be preserved although it is very likely to
243 * always end up as clear.
247 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */
249 sarb $1,kernel_fpu_lock
258 sarb $1,kernel_fpu_lock
262 fninit /* XXX should avoid needing this */
267 * Align to an 8 byte boundary (misalignment in the main loop would
268 * cost a factor of >= 2). Avoid jumps (at little cost if it is
269 * already aligned) by always zeroing 8 bytes and using the part up
270 * to the _next_ alignment position.
273 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */
279 * Similarly align `len' to a multiple of 8.
286 * This wouldn't be any faster if it were unrolled, since the loop
287 * control instructions are much faster than the fstl and/or done
288 * in parallel with it so their overhead is insignificant.
290 fpureg_i586_bzero_loop:
295 jae fpureg_i586_bzero_loop
302 movb $0xfe,kernel_fpu_lock
308 movb $0xfe,kernel_fpu_lock
313 * `rep stos' seems to be the best method in practice for small
314 * counts. Fancy methods usually take too long to start up due
315 * to cache and BTB misses.
335 #endif /* I586_CPU && NNPX > 0 */
387 /* fillw(pat, base, cnt) */
407 cmpl %ecx,%eax /* overlapping && src < dst? */
409 cld /* nope, copy forwards */
418 addl %ecx,%edi /* copy backwards. */
439 * generic_bcopy(src, dst, cnt)
440 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
451 cmpl %ecx,%eax /* overlapping && src < dst? */
454 shrl $2,%ecx /* copy by 32-bit words */
455 cld /* nope, copy forwards */
459 andl $3,%ecx /* any bytes left? */
468 addl %ecx,%edi /* copy backwards */
472 andl $3,%ecx /* any fractional bytes? */
476 movl 20(%esp),%ecx /* copy remainder by 32-bit words */
487 #if defined(I586_CPU) && NNPX > 0
497 cmpl %ecx,%eax /* overlapping && src < dst? */
503 sarb $1,kernel_fpu_lock
516 fninit /* XXX should avoid needing this */
521 #define DCACHE_SIZE 8192
522 cmpl $(DCACHE_SIZE-512)/2,%ecx
524 movl $(DCACHE_SIZE-512)/2,%ecx
528 jb 5f /* XXX should prefetch if %ecx >= 32 */
549 large_i586_bcopy_loop:
570 jae large_i586_bcopy_loop
582 movb $0xfe,kernel_fpu_lock
585 * This is a duplicate of the main part of generic_bcopy. See the comments
586 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and
587 * would mess up high resolution profiling.
623 #endif /* I586_CPU && NNPX > 0 */
626 * Note: memcpy does not support overlapping copies
635 shrl $2,%ecx /* copy by 32-bit words */
636 cld /* nope, copy forwards */
640 andl $3,%ecx /* any bytes left? */
648 /*****************************************************************************/
649 /* copyout and fubyte family */
650 /*****************************************************************************/
652 * Access user memory from inside the kernel. These routines and possibly
653 * the math- and DOS emulators should be the only places that do this.
655 * We have to access the memory with user's permissions, so use a segment
656 * selector with RPL 3. For writes to user space we have to additionally
657 * check the PTE for write permission, because the 386 does not check
658 * write permissions when we are executing with EPL 0. The 486 does check
659 * this if the WP bit is set in CR0, so we can use a simpler version here.
661 * These routines set curpcb->onfault for the time they execute. When a
662 * protection violation occurs inside the functions, the trap handler
663 * returns to *curpcb->onfault instead of the function.
667 * copyout(from_kernel, to_user, len) - MP SAFE (if not I386_CPU)
673 ENTRY(generic_copyout)
675 movl $copyout_fault,PCB_ONFAULT(%eax)
682 testl %ebx,%ebx /* anything to do? */
686 * Check explicitly for non-user addresses. If 486 write protection
687 * is being used, this check is essential because we are in kernel
688 * mode so the h/w does not provide any protection against writing
693 * First, prevent address wrapping.
699 * XXX STOP USING VM_MAXUSER_ADDRESS.
700 * It is an end address, not a max, so every time it is used correctly it
701 * looks like there is an off by one error, and of course it caused an off
702 * by one error in several places.
704 cmpl $VM_MAXUSER_ADDRESS,%eax
707 #if defined(I386_CPU)
709 #if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
710 cmpl $CPUCLASS_386,_cpu_class
714 * We have to check each PTE for user write permission.
715 * The checking may cause a page fault, so it is important to set
716 * up everything for return via copyout_fault before here.
718 /* compute number of pages */
723 shrl $IDXSHIFT+2,%ecx
726 /* compute PTE offset for start address */
732 /* check PTE for each page */
733 leal _PTmap(%edx),%eax
736 testb $PG_V,_PTmap(%eax) /* PTE page must be valid */
738 movb _PTmap(%edx),%al
739 andb $PG_V|PG_RW|PG_U,%al /* page must be valid and user writable */
740 cmpb $PG_V|PG_RW|PG_U,%al
744 /* simulate a trap */
749 call _trapwrite /* trapwrite(addr) */
754 testl %eax,%eax /* if not ok, return EFAULT */
760 jnz 1b /* check next page */
761 #endif /* I386_CPU */
763 /* bcopy(%esi, %edi, %ebx) */
767 #if defined(I586_CPU) && NNPX > 0
786 movl %eax,PCB_ONFAULT(%edx)
795 movl $0,PCB_ONFAULT(%edx)
799 #if defined(I586_CPU) && NNPX > 0
802 * Duplicated from generic_copyout. Could be done a bit better.
805 movl $copyout_fault,PCB_ONFAULT(%eax)
812 testl %ebx,%ebx /* anything to do? */
816 * Check explicitly for non-user addresses. If 486 write protection
817 * is being used, this check is essential because we are in kernel
818 * mode so the h/w does not provide any protection against writing
823 * First, prevent address wrapping.
829 * XXX STOP USING VM_MAXUSER_ADDRESS.
830 * It is an end address, not a max, so every time it is used correctly it
831 * looks like there is an off by one error, and of course it caused an off
832 * by one error in several places.
834 cmpl $VM_MAXUSER_ADDRESS,%eax
837 /* bcopy(%esi, %edi, %ebx) */
841 * End of duplicated code.
851 #endif /* I586_CPU && NNPX > 0 */
854 * copyin(from_user, to_kernel, len) - MP SAFE
860 ENTRY(generic_copyin)
862 movl $copyin_fault,PCB_ONFAULT(%eax)
865 movl 12(%esp),%esi /* caddr_t from */
866 movl 16(%esp),%edi /* caddr_t to */
867 movl 20(%esp),%ecx /* size_t len */
870 * make sure address is valid
875 cmpl $VM_MAXUSER_ADDRESS,%edx
878 #if defined(I586_CPU) && NNPX > 0
883 shrl $2,%ecx /* copy longword-wise */
888 andb $3,%cl /* copy remaining bytes */
892 #if defined(I586_CPU) && NNPX > 0
900 movl %eax,PCB_ONFAULT(%edx)
908 movl $0,PCB_ONFAULT(%edx)
912 #if defined(I586_CPU) && NNPX > 0
915 * Duplicated from generic_copyin. Could be done a bit better.
918 movl $copyin_fault,PCB_ONFAULT(%eax)
921 movl 12(%esp),%esi /* caddr_t from */
922 movl 16(%esp),%edi /* caddr_t to */
923 movl 20(%esp),%ecx /* size_t len */
926 * make sure address is valid
931 cmpl $VM_MAXUSER_ADDRESS,%edx
934 * End of duplicated code.
940 pushl %ebx /* XXX prepare for fastmove_fault */
945 #endif /* I586_CPU && NNPX > 0 */
947 #if defined(I586_CPU) && NNPX > 0
948 /* fastmove(src, dst, len)
951 len in %ecx XXX changed to on stack for profiling
952 uses %eax and %edx for tmp. storage
954 /* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */
958 subl $PCB_SAVE87_SIZE+3*4,%esp
964 testl $7,%esi /* check if src addr is multiple of 8 */
967 testl $7,%edi /* check if dst addr is multiple of 8 */
970 /* if (npxthread != NULL) { */
973 /* fnsave(&curpcb->pcb_savefpu); */
975 fnsave PCB_SAVEFPU(%eax)
976 /* npxthread = NULL; */
980 /* now we own the FPU. */
983 * The process' FP state is saved in the pcb, but if we get
984 * switched, the cpu_switch() will store our FP state in the
985 * pcb. It should be possible to avoid all the copying for
986 * this, e.g., by setting a flag to tell cpu_switch() to
987 * save the state somewhere else.
989 /* tmp = curpcb->pcb_savefpu; */
995 addl $PCB_SAVEFPU,%esi
997 movl $PCB_SAVE87_SIZE>>2,%ecx
1003 /* stop_emulating(); */
1005 /* npxthread = curthread; */
1006 movl _curthread,%eax
1007 movl %eax,_npxthread
1009 movl $fastmove_fault,PCB_ONFAULT(%eax)
1066 /* curpcb->pcb_savefpu = tmp; */
1071 addl $PCB_SAVEFPU,%edi
1074 movl $PCB_SAVE87_SIZE>>2,%ecx
1081 /* start_emulating(); */
1085 /* npxthread = NULL; */
1091 movl $fastmove_tail_fault,PCB_ONFAULT(%eax)
1094 shrl $2,%ecx /* copy longword-wise */
1099 andb $3,%cl /* copy remaining bytes */
1110 addl $PCB_SAVEFPU,%edi
1113 movl $PCB_SAVE87_SIZE>>2,%ecx
1122 fastmove_tail_fault:
1130 movl $0,PCB_ONFAULT(%edx)
1133 #endif /* I586_CPU && NNPX > 0 */
1136 * fu{byte,sword,word} - MP SAFE
1138 * Fetch a byte (sword, word) from user memory
1142 movl $fusufault,PCB_ONFAULT(%ecx)
1143 movl 4(%esp),%edx /* from */
1145 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
1149 movl $0,PCB_ONFAULT(%ecx)
1153 * These two routines are called from the profiling code, potentially
1154 * at interrupt time. If they fail, that's okay, good things will
1155 * happen later. Fail all the time for now - until the trap code is
1156 * able to deal with this.
1168 movl $fusufault,PCB_ONFAULT(%ecx)
1171 cmpl $VM_MAXUSER_ADDRESS-2,%edx
1175 movl $0,PCB_ONFAULT(%ecx)
1183 movl $fusufault,PCB_ONFAULT(%ecx)
1186 cmpl $VM_MAXUSER_ADDRESS-1,%edx
1190 movl $0,PCB_ONFAULT(%ecx)
1197 movl %eax,PCB_ONFAULT(%ecx)
1202 * su{byte,sword,word} - MP SAFE (if not I386_CPU)
1204 * Write a byte (word, longword) to user memory
1208 movl $fusufault,PCB_ONFAULT(%ecx)
1211 #if defined(I386_CPU)
1213 #if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1214 cmpl $CPUCLASS_386,_cpu_class
1215 jne 2f /* we only have to set the right segment selector */
1216 #endif /* I486_CPU || I586_CPU || I686_CPU */
1218 /* XXX - page boundary crossing is still not handled */
1223 leal _PTmap(%edx),%ecx
1226 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */
1228 movb _PTmap(%edx),%dl
1229 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */
1230 cmpb $PG_V|PG_RW|PG_U,%dl
1234 /* simulate a trap */
1237 popl %edx /* remove junk parameter from stack */
1245 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */
1252 movl %eax,PCB_ONFAULT(%ecx)
1256 * susword - MP SAFE (if not I386_CPU)
1260 movl $fusufault,PCB_ONFAULT(%ecx)
1263 #if defined(I386_CPU)
1265 #if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1266 cmpl $CPUCLASS_386,_cpu_class
1268 #endif /* I486_CPU || I586_CPU || I686_CPU */
1270 /* XXX - page boundary crossing is still not handled */
1275 leal _PTmap(%edx),%ecx
1278 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */
1280 movb _PTmap(%edx),%dl
1281 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */
1282 cmpb $PG_V|PG_RW|PG_U,%dl
1286 /* simulate a trap */
1289 popl %edx /* remove junk parameter from stack */
1297 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */
1303 movl _curpcb,%ecx /* restore trashed register */
1304 movl %eax,PCB_ONFAULT(%ecx)
1308 * su[i]byte - MP SAFE (if not I386_CPU)
1313 movl $fusufault,PCB_ONFAULT(%ecx)
1316 #if defined(I386_CPU)
1318 #if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1319 cmpl $CPUCLASS_386,_cpu_class
1321 #endif /* I486_CPU || I586_CPU || I686_CPU */
1327 leal _PTmap(%edx),%ecx
1330 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */
1332 movb _PTmap(%edx),%dl
1333 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */
1334 cmpb $PG_V|PG_RW|PG_U,%dl
1338 /* simulate a trap */
1341 popl %edx /* remove junk parameter from stack */
1349 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */
1355 movl _curpcb,%ecx /* restore trashed register */
1356 movl %eax,PCB_ONFAULT(%ecx)
1360 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
1362 * copy a string from from to to, stop when a 0 character is reached.
1363 * return ENAMETOOLONG if string is longer than maxlen, and
1364 * EFAULT on protection violations. If lencopied is non-zero,
1365 * return the actual length in *lencopied.
1371 movl $cpystrflt,PCB_ONFAULT(%ecx)
1373 movl 12(%esp),%esi /* %esi = from */
1374 movl 16(%esp),%edi /* %edi = to */
1375 movl 20(%esp),%edx /* %edx = maxlen */
1377 movl $VM_MAXUSER_ADDRESS,%eax
1379 /* make sure 'from' is within bounds */
1383 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1401 /* Success -- 0 byte reached */
1406 /* edx is zero - return ENAMETOOLONG or EFAULT */
1407 cmpl $VM_MAXUSER_ADDRESS,%esi
1410 movl $ENAMETOOLONG,%eax
1417 /* set *lencopied and return %eax */
1419 movl $0,PCB_ONFAULT(%ecx)
1433 * copystr(from, to, maxlen, int *lencopied) - MP SAFE
1439 movl 12(%esp),%esi /* %esi = from */
1440 movl 16(%esp),%edi /* %edi = to */
1441 movl 20(%esp),%edx /* %edx = maxlen */
1452 /* Success -- 0 byte reached */
1457 /* edx is zero -- return ENAMETOOLONG */
1458 movl $ENAMETOOLONG,%eax
1461 /* set *lencopied and return %eax */
1483 cld /* compare forwards */
1502 * Handling of special 386 registers and descriptor tables etc
1504 /* void lgdt(struct region_descriptor *rdp); */
1506 /* reload the descriptor table */
1510 /* flush the prefetch q */
1514 /* reload "stale" selectors */
1525 /* reload code selector by turning return into intersegmental return */
1532 * void lidt(struct region_descriptor *rdp);
1540 * void lldt(u_short sel)
1547 * void ltr(u_short sel)
1553 /* ssdtosd(*ssdp,*sdp) */
1590 /* void load_cr3(caddr_t cr3) */
1592 #if defined(SWTCH_OPTIM_STATS)
1593 incl _tlb_flush_count
1604 /* void load_cr4(caddr_t cr4) */
1610 /* void reset_dbregs() */
1613 movl %eax,%dr7 /* disable all breapoints first */
1621 /*****************************************************************************/
1622 /* setjump, longjump */
1623 /*****************************************************************************/
1627 movl %ebx,(%eax) /* save ebx */
1628 movl %esp,4(%eax) /* save esp */
1629 movl %ebp,8(%eax) /* save ebp */
1630 movl %esi,12(%eax) /* save esi */
1631 movl %edi,16(%eax) /* save edi */
1632 movl (%esp),%edx /* get rta */
1633 movl %edx,20(%eax) /* save eip */
1634 xorl %eax,%eax /* return(0); */
1639 movl (%eax),%ebx /* restore ebx */
1640 movl 4(%eax),%esp /* restore esp */
1641 movl 8(%eax),%ebp /* restore ebp */
1642 movl 12(%eax),%esi /* restore esi */
1643 movl 16(%eax),%edi /* restore edi */
1644 movl 20(%eax),%edx /* get rta */
1645 movl %edx,(%esp) /* put in return frame */
1646 xorl %eax,%eax /* return(1); */
1651 * Support for BB-profiling (gcc -a). The kernbb program will extract
1652 * the data from the kernel.
1662 NON_GPROF_ENTRY(__bb_init_func)
1668 .byte 0xc3 /* avoid macro for `ret' */