2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * $DragonFly: src/sys/platform/pc32/i386/bcopy.s,v 1.2 2004/04/30 00:59:52 dillon Exp $
29 * bcopy(source:%esi, target:%edi, count:%ecx)
31 * note: esi, edi, eax, ecx, and edx may be destroyed
36 #include <machine/asmacros.h>
37 #include <machine/cputypes.h>
38 #include <machine/pmap.h>
39 #include <machine/specialreg.h>
46 * If memcpy/bcopy is called as part of a copyin or copyout, the
47 * on-fault routine is set up to do a 'ret'. We hve to restore
48 * %ebx and return to the copyin/copyout fault handler.
52 addl $4,%esp /* skip normal return vector */
53 ret /* return to copyin/copyout fault handler */
56 * GENERIC BCOPY() - COPY DIRECTION CHECK AND FORWARDS COPY
58 * Reasonably optimal on all modern machines.
62 ENTRY(asm_generic_memcpy) /* memcpy() entry point use optimal copy */
64 pushl $generic_onfault
68 ENTRY(asm_generic_bcopy)
70 pushl $generic_onfault
71 cmpl %esi,%edi /* if (edi < esi) fwd copy ok */
74 cmpl %esi,%edi /* if (edi < esi + count) do bkwrds copy */
113 * GENERIC_BCOPY() - BACKWARDS COPY
157 * MMX BCOPY() - COPY DIRECTION CHECK AND FORWARDS COPY
159 * Reasonably optimal on all modern machines with MMX or SSE2.
160 * XXX But very messy, we need a better way to use fp in the kernel.
162 * note: esi, edi, eax, ecx, and edx may be destroyed
164 * In order for the kernel to be able to use the FPU:
166 * (1) The kernel may not already be using the fpu
168 * (2) If the fpu is owned by the application, we must save
169 * its state. If the fpu is not owned by the application
170 * the application's saved fp state may already exist
173 * (3) We cannot allow the kernel overwrite the application's
174 * FPU state with our own, so we allocate space on the
175 * stack and create a new TD_SAVEFPU, saving the old
178 * (4) While we are using the FP unit, an interrupt may come
179 * along and preempt us, causing our FP state to be saved.
180 * We will fault/restore upon resumption. Our FP state
181 * will be saved on the stack.
183 * (5) To clean up we throw away our FP state and, zero out
184 * npxthread to indicate that the application's FP state
185 * is stored in TD_SAVEFPU, and we then restore the original
188 * We do not attempt to restore the application's FP state.
189 * We set the TS bit to guarentee that the application will
190 * fault when it next tries to access the FP (to restore its
193 * NOTE: fxsave requires a 16-byte aligned address
195 * MMX+XMM (SSE2): Typical on Athlons, later P4s. 128 bit media insn.
196 * MMX: Typical on XPs and P3s. 64 bit media insn.
199 #define MMX_SAVE_BLOCK(missfunc) \
202 btsl $1,PCPU(kernel_fpu_lock) ; \
207 movl PCPU(curthread),%edx ; \
208 movl TD_SAVEFPU(%edx),%ebx ; \
210 andl $0xfffffff0,%esp ; \
211 movl %esp,TD_SAVEFPU(%edx) ; \
212 cmpl %edx,PCPU(npxthread) ; \
217 movl %edx,PCPU(npxthread) ; \
222 #define MMX_RESTORE_BLOCK \
226 #define MMX_RESTORE_BLOCK2 \
227 movl PCPU(curthread),%edx ; \
228 movl $0,PCPU(npxthread) ; \
229 movl %ebx,TD_SAVEFPU(%edx) ; \
236 movl $0,PCPU(kernel_fpu_lock)
239 * xmm/mmx_onfault routine. Restore the fpu state, skip the normal
240 * return vector, and return to the caller's on-fault routine
241 * (which was pushed on the callers stack just before he calle us)
249 * MXX entry points - only support 64 bit media instructions
252 ENTRY(asm_mmx_memcpy) /* memcpy() entry point use optimal copy */
253 MMX_SAVE_BLOCK(asm_generic_memcpy)
258 MMX_SAVE_BLOCK(asm_generic_bcopy)
259 cmpl %esi,%edi /* if (edi < esi) fwd copy ok */
262 cmpl %esi,%edi /* if (edi < esi + count) do bkwrds copy */
268 * XMM entry points - support 128 bit media instructions
271 ENTRY(asm_xmm_memcpy) /* memcpy() entry point use optimal copy */
272 MMX_SAVE_BLOCK(asm_generic_memcpy)
277 MMX_SAVE_BLOCK(asm_generic_bcopy)
278 cmpl %esi,%edi /* if (edi < esi) fwd copy ok */
281 cmpl %esi,%edi /* if (edi < esi + count) do bkwrds copy */
285 movl %esi,%eax /* skip xmm if the data is not aligned */
296 movdqa 16(%esi),%xmm1
297 movdqa 32(%esi),%xmm2
298 movdqa 48(%esi),%xmm3
299 movdqa 64(%esi),%xmm4
300 movdqa 80(%esi),%xmm5
301 movdqa 96(%esi),%xmm6
302 movdqa 112(%esi),%xmm7
303 /*prefetchnta 128(%esi) 3dNOW */
307 * movdqa or movntdq can be used.
310 movdqa %xmm1,16(%edi)
311 movdqa %xmm2,32(%edi)
312 movdqa %xmm3,48(%edi)
313 movdqa %xmm4,64(%edi)
314 movdqa %xmm5,80(%edi)
315 movdqa %xmm6,96(%edi)
316 movdqa %xmm7,112(%edi)
334 /*prefetchnta 128(%esi) 3dNOW */
358 * GENERIC_BCOPY() - BACKWARDS COPY
360 * Don't bother using xmm optimizations, just stick with mmx.
377 /*prefetchnta -128(%esi)*/