| Commit | Line | Data |
|---|---|---|
| 263541db | 1 | /* |
| 8c10bfcf MD |
2 | * Copyright (c) 2004 The DragonFly Project. All rights reserved. |
| 3 | * | |
| 4 | * This code is derived from software contributed to The DragonFly Project | |
| 5 | * by Matthew Dillon <dillon@backplane.com> | |
| 6 | * | |
| 263541db MD |
7 | * Redistribution and use in source and binary forms, with or without |
| 8 | * modification, are permitted provided that the following conditions | |
| 9 | * are met: | |
| 8c10bfcf | 10 | * |
| 263541db MD |
11 | * 1. Redistributions of source code must retain the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer. | |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 8c10bfcf MD |
14 | * notice, this list of conditions and the following disclaimer in |
| 15 | * the documentation and/or other materials provided with the | |
| 16 | * distribution. | |
| 17 | * 3. Neither the name of The DragonFly Project nor the names of its | |
| 18 | * contributors may be used to endorse or promote products derived | |
| 19 | * from this software without specific, prior written permission. | |
| 20 | * | |
| 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
| 24 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
| 25 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
| 26 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
| 27 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 28 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
| 29 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 30 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
| 31 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 263541db | 32 | * SUCH DAMAGE. |
| 263541db MD |
33 | */ |
| 34 | /* | |
| 35 | * bcopy(source:%esi, target:%edi, count:%ecx) | |
| 36 | * | |
| 37 | * note: esi, edi, eax, ecx, and edx may be destroyed | |
| 38 | */ | |
| 39 | ||
| 263541db MD |
40 | #include <machine/asmacros.h> |
| 41 | #include <machine/cputypes.h> | |
| 42 | #include <machine/pmap.h> | |
| 43 | #include <machine/specialreg.h> | |
| 44 | ||
| 45 | #include "assym.s" | |
| 46 | ||
| 47 | .text | |
| 48 | ||
| 49 | /* | |
| af5ac7fb MD |
50 | * bcopyb() is a 'dumb' byte-granular bcopy. It is only used by |
| 51 | * devices which need to bcopy device-mapped memory which cannot | |
| 52 | * otherwise handle 16 or 32 bit ops. | |
| 53 | */ | |
| 54 | ALIGN_TEXT | |
| 55 | ENTRY(bcopyb) | |
| 56 | pushl %esi | |
| 57 | pushl %edi | |
| 58 | movl 12(%esp),%esi | |
| 59 | movl 16(%esp),%edi | |
| 60 | movl 20(%esp),%ecx | |
| 61 | movl %edi,%eax | |
| 62 | subl %esi,%eax | |
| 63 | cmpl %ecx,%eax /* overlapping && src < dst? */ | |
| 64 | jb 1f | |
| 65 | cld /* nope, copy forwards */ | |
| 66 | rep | |
| 67 | movsb | |
| 68 | popl %edi | |
| 69 | popl %esi | |
| 70 | ret | |
| 71 | ||
| 72 | ALIGN_TEXT | |
| 73 | 1: | |
| 74 | addl %ecx,%edi /* copy backwards. */ | |
| 75 | addl %ecx,%esi | |
| 76 | decl %edi | |
| 77 | decl %esi | |
| 78 | std | |
| 79 | rep | |
| 80 | movsb | |
| 81 | popl %edi | |
| 82 | popl %esi | |
| 83 | cld | |
| 84 | ret | |
| 85 | ||
| e14fb90b MD |
86 | /* |
| 87 | * bcopyi(s, d, len) (NON OVERLAPPING) | |
| 88 | * | |
| 89 | * This is a dumb 32-bit-granular bcopy | |
| 90 | */ | |
| 91 | ALIGN_TEXT | |
| 92 | ENTRY(bcopyi) | |
| 93 | pushl %esi | |
| 94 | pushl %edi | |
| 95 | movl 12(%esp),%esi | |
| 96 | movl 16(%esp),%edi | |
| 97 | movl 20(%esp),%ecx | |
| 98 | shrl $2,%ecx | |
| 99 | cld | |
| 100 | rep | |
| 101 | movsl | |
| 102 | popl %edi | |
| 103 | popl %esi | |
| 104 | ret | |
| af5ac7fb MD |
105 | |
| 106 | /* | |
| 263541db | 107 | * If memcpy/bcopy is called as part of a copyin or copyout, the |
| af5ac7fb | 108 | * on-fault routine is set up to do a 'ret'. We have to restore |
| 263541db MD |
109 | * %ebx and return to the copyin/copyout fault handler. |
| 110 | */ | |
| 111 | generic_onfault: | |
| 112 | popl %ebx | |
| 113 | addl $4,%esp /* skip normal return vector */ | |
| 114 | ret /* return to copyin/copyout fault handler */ | |
| 115 | ||
| 116 | /* | |
| 117 | * GENERIC BCOPY() - COPY DIRECTION CHECK AND FORWARDS COPY | |
| 118 | * | |
| 93ad6da2 | 119 | * Reasonably optimal on all modern machines. |
| 263541db MD |
120 | */ |
| 121 | ||
| 122 | SUPERALIGN_TEXT | |
| 123 | ENTRY(asm_generic_memcpy) /* memcpy() entry point use optimal copy */ | |
| 93ad6da2 MD |
124 | pushl %ebx /* WARNING COPYIN/OUT EXPECTS THIS FRAME */ |
| 125 | pushl $generic_onfault/* WARNING COPYIN/OUT EXPECTS THIS FRAME */ | |
| 263541db MD |
126 | jmp 2f |
| 127 | ||
| 128 | SUPERALIGN_TEXT | |
| 129 | ENTRY(asm_generic_bcopy) | |
| 93ad6da2 MD |
130 | pushl %ebx /* WARNING COPYIN/OUT EXPECTS THIS FRAME */ |
| 131 | pushl $generic_onfault/* WARNING COPYIN/OUT EXPECTS THIS FRAME */ | |
| 263541db MD |
132 | cmpl %esi,%edi /* if (edi < esi) fwd copy ok */ |
| 133 | jb 2f | |
| 134 | addl %ecx,%esi | |
| 135 | cmpl %esi,%edi /* if (edi < esi + count) do bkwrds copy */ | |
| 136 | jb 10f | |
| 137 | subl %ecx,%esi | |
| 138 | jmp 2f | |
| 139 | ||
| 140 | SUPERALIGN_TEXT | |
| 141 | 1: | |
| 142 | movl (%esi),%eax | |
| 143 | movl 4(%esi),%ebx | |
| 144 | movl 8(%esi),%edx | |
| 145 | movl %eax,(%edi) | |
| 146 | movl 12(%esi),%eax | |
| 147 | movl %ebx,4(%edi) | |
| 148 | movl 16(%esi),%ebx | |
| 149 | movl %edx,8(%edi) | |
| 150 | movl 20(%esi),%edx | |
| 151 | movl %eax,12(%edi) | |
| 152 | movl 24(%esi),%eax | |
| 153 | movl %ebx,16(%edi) | |
| 154 | movl 28(%esi),%ebx | |
| 155 | movl %edx,20(%edi) | |
| 156 | movl %eax,24(%edi) | |
| 157 | addl $32,%esi | |
| 158 | movl %ebx,28(%edi) | |
| 159 | addl $32,%edi | |
| 160 | 2: | |
| 161 | subl $32,%ecx | |
| 162 | jae 1b | |
| 163 | addl $32,%ecx | |
| 164 | jz 3f | |
| 165 | cld | |
| 166 | rep | |
| 167 | movsb | |
| 168 | 3: | |
| 169 | addl $4,%esp | |
| 170 | popl %ebx | |
| 171 | ret | |
| 172 | ||
| 173 | /* | |
| 174 | * GENERIC_BCOPY() - BACKWARDS COPY | |
| 175 | */ | |
| 176 | SUPERALIGN_TEXT | |
| 177 | 10: | |
| 178 | addl %ecx,%edi | |
| 179 | jmp 12f | |
| 180 | ||
| 181 | SUPERALIGN_TEXT | |
| 182 | 11: | |
| 183 | movl -4(%esi),%eax | |
| 184 | movl -8(%esi),%ebx | |
| 185 | movl -12(%esi),%edx | |
| 186 | movl %eax,-4(%edi) | |
| 187 | movl -16(%esi),%eax | |
| 188 | movl %ebx,-8(%edi) | |
| 189 | movl -20(%esi),%ebx | |
| 190 | movl %edx,-12(%edi) | |
| 191 | movl -24(%esi),%edx | |
| 192 | movl %eax,-16(%edi) | |
| 193 | movl -28(%esi),%eax | |
| 194 | movl %ebx,-20(%edi) | |
| 195 | movl -32(%esi),%ebx | |
| 196 | movl %edx,-24(%edi) | |
| 197 | movl %eax,-28(%edi) | |
| 198 | subl $32,%esi | |
| 199 | movl %ebx,-32(%edi) | |
| 200 | subl $32,%edi | |
| 201 | 12: | |
| 202 | subl $32,%ecx | |
| 203 | jae 11b | |
| 204 | addl $32,%ecx | |
| 205 | jz 13f | |
| 206 | decl %esi | |
| 207 | decl %edi | |
| 208 | std | |
| 209 | rep | |
| 210 | movsb | |
| 211 | cld | |
| 212 | 13: | |
| 213 | addl $4,%esp | |
| 214 | popl %ebx | |
| 215 | ret | |
| 216 | ||
| 217 | /* | |
| 218 | * MMX BCOPY() - COPY DIRECTION CHECK AND FORWARDS COPY | |
| 219 | * | |
| a02705a9 | 220 | * note: esi, edi, eax, ecx, and edx are allowed to be destroyed. |
| 263541db MD |
221 | * |
| 222 | * In order for the kernel to be able to use the FPU: | |
| 223 | * | |
| a02705a9 | 224 | * (1) The kernel may not already be using the fpu. |
| 65d6ce10 | 225 | * |
| 263541db | 226 | * (2) If the fpu is owned by the application, we must save |
| 65d6ce10 MD |
227 | * its state. If the fpu is not owned by the application |
| 228 | * the application's saved fp state may already exist | |
| 229 | * in TD_SAVEFPU. | |
| 263541db | 230 | * |
| a02705a9 MD |
231 | * (3) We cannot allow the kernel to overwrite the application's |
| 232 | * FPU state with our own, so we make sure the application's | |
| 233 | * FPU state has been saved and then point TD_SAVEFPU at a | |
| 234 | * temporary fpu save area in the globaldata structure. | |
| 65d6ce10 | 235 | * |
| a02705a9 MD |
236 | * RACES/ALGORITHM: |
| 237 | * | |
| 238 | * If gd_npxthread is not NULL we must save the application's | |
| 239 | * current FP state to the current save area and then NULL | |
| 240 | * out gd_npxthread to interlock against new interruptions | |
| 241 | * changing the FP state further. | |
| 263541db | 242 | * |
| a02705a9 MD |
243 | * If gd_npxthread is NULL the FP unit is in a known 'safe' |
| 244 | * state and may be used once the new save area is installed. | |
| 263541db | 245 | * |
| a02705a9 MD |
246 | * race(1): If an interrupt occurs just prior to calling fxsave |
| 247 | * all that happens is that fxsave gets a npxdna trap, restores | |
| 248 | * the app's environment, and immediately traps, restores, | |
| 249 | * and saves it again. | |
| 263541db | 250 | * |
| a02705a9 | 251 | * race(2): No interrupt can safely occur after we NULL-out |
| e687cf27 | 252 | * npxthread until we fnclex, because the kernel assumes that |
| a02705a9 MD |
253 | * the FP unit is in a safe state when npxthread is NULL. It's |
| 254 | * more convenient to use a cli sequence here (it is not | |
| 255 | * considered to be in the critical path), but a critical | |
| 256 | * section would also work. | |
| 263541db | 257 | * |
| e687cf27 MD |
258 | * NOTE ON FNINIT vs FNCLEX - Making the FP unit safe here is |
| 259 | * the goal. It should be sufficient to just call FNCLEX rather | |
| 260 | * then having to FNINIT the entire unit. | |
| 261 | * | |
| a02705a9 MD |
262 | * race(3): The FP unit is in a known state (because npxthread |
| 263 | * was either previously NULL or we saved and init'd and made | |
| 264 | * it NULL). This is true even if we are preempted and the | |
| 265 | * preempting thread uses the FP unit, because it will be | |
| 266 | * fninit's again on return. ANY STATE WE SAVE TO THE FPU MAY | |
| 267 | * BE DESTROYED BY PREEMPTION WHILE NPXTHREAD IS NULL! However, | |
| 268 | * an interrupt occuring inbetween clts and the setting of | |
| 269 | * gd_npxthread may set the TS bit again and cause the next | |
| 270 | * npxdna() to panic when it sees a non-NULL gd_npxthread. | |
| 271 | * | |
| 272 | * We can safely set TD_SAVEFPU to point to a new uninitialized | |
| 273 | * save area and then set GD_NPXTHREAD to non-NULL. If an | |
| 274 | * interrupt occurs after we set GD_NPXTHREAD, all that happens | |
| 275 | * is that the safe FP state gets saved and restored. We do not | |
| e687cf27 | 276 | * need to clex again. |
| 330938c0 | 277 | * |
| a02705a9 MD |
278 | * We can safely clts after setting up the new save-area, before |
| 279 | * installing gd_npxthread, even if we get preempted just after | |
| 280 | * calling clts. This is because the FP unit will be in a safe | |
| 281 | * state while gd_npxthread is NULL. Setting gd_npxthread will | |
| 282 | * simply lock-in that safe-state. Calling clts saves | |
| 283 | * unnecessary trap overhead since we are about to use the FP | |
| 284 | * unit anyway and don't need to 'restore' any state prior to | |
| 285 | * that first use. | |
| 330938c0 | 286 | * |
| 263541db MD |
287 | * MMX+XMM (SSE2): Typical on Athlons, later P4s. 128 bit media insn. |
| 288 | * MMX: Typical on XPs and P3s. 64 bit media insn. | |
| 93ad6da2 MD |
289 | * |
| 290 | * WARNING! copyin/copyout expects a push %ebx/onfault frame ONLY, | |
| 291 | * we can't mess with the stack any more than that. | |
| 263541db MD |
292 | */ |
| 293 | ||
| a02705a9 MD |
294 | #define MMX_SAVE_BLOCK(missfunc) \ |
| 295 | cmpl $2048,%ecx ; \ | |
| 296 | jb missfunc ; \ | |
| 297 | movl MYCPU,%eax ; /* EAX = MYCPU */ \ | |
| 298 | btsl $1,GD_FPU_LOCK(%eax) ; \ | |
| 299 | jc missfunc ; \ | |
| 300 | pushl %ebx ; \ | |
| 301 | pushl %ecx ; \ | |
| 302 | movl GD_CURTHREAD(%eax),%edx ; /* EDX = CURTHREAD */ \ | |
| 303 | movl TD_SAVEFPU(%edx),%ebx ; /* save app save area */\ | |
| f9235b6d | 304 | incl TD_CRITCOUNT(%edx) ; \ |
| a02705a9 MD |
305 | cmpl $0,GD_NPXTHREAD(%eax) ; \ |
| 306 | je 100f ; \ | |
| 307 | fxsave 0(%ebx) ; /* race(1) */ \ | |
| 308 | movl $0,GD_NPXTHREAD(%eax) ; /* interlock intr */ \ | |
| 309 | clts ; \ | |
| e687cf27 | 310 | fnclex ; /* race(2) */ \ |
| a02705a9 MD |
311 | 100: ; \ |
| 312 | leal GD_SAVEFPU(%eax),%ecx ; \ | |
| 313 | movl %ecx,TD_SAVEFPU(%edx) ; \ | |
| aad81e48 | 314 | orl $TDF_KERNELFP,TD_FLAGS(%edx) ; \ |
| a02705a9 MD |
315 | clts ; \ |
| 316 | movl %edx,GD_NPXTHREAD(%eax) ; /* race(3) */ \ | |
| f9235b6d | 317 | decl TD_CRITCOUNT(%edx) ; /* crit_exit() */ \ |
| a02705a9 MD |
318 | cmpl $0,GD_REQFLAGS(%eax) ; \ |
| 319 | je 101f ; \ | |
| f9235b6d MD |
320 | testl $-1,TD_CRITCOUNT(%edx) ; \ |
| 321 | jne 101f ; \ | |
| faaeffac | 322 | call splz_check ; \ |
| a02705a9 MD |
323 | /* note: eax,ecx,edx destroyed */ \ |
| 324 | 101: ; \ | |
| 325 | movl (%esp),%ecx ; \ | |
| 326 | movl $mmx_onfault,(%esp) ; \ | |
| 263541db | 327 | |
| 330938c0 | 328 | /* |
| a02705a9 MD |
329 | * When restoring the application's FP state we must first clear |
| 330 | * npxthread to prevent further saves, then restore the pointer | |
| 331 | * to the app's save area. We do not have to (and should not) | |
| 332 | * restore the app's FP state now. Note that we do not have to | |
| e687cf27 | 333 | * call fnclex because our use of the FP guarentees that it is in |
| a02705a9 | 334 | * a 'safe' state (at least for kernel use). |
| 330938c0 | 335 | * |
| a02705a9 MD |
336 | * NOTE: it is not usually safe to mess with CR0 outside of a |
| 337 | * critical section, because TS may get set by a preemptive | |
| 338 | * interrupt. However, we *can* race a load/set-ts/store against | |
| 339 | * an interrupt doing the same thing. | |
| 431d0fef MD |
340 | * |
| 341 | * WARNING! A Virtual kernel depends on CR0_TS remaining set after | |
| 342 | * we use the FP unit if it asked it to be set. | |
| 330938c0 | 343 | */ |
| 263541db MD |
344 | |
| 345 | #define MMX_RESTORE_BLOCK \ | |
| 346 | addl $4,%esp ; \ | |
| 347 | MMX_RESTORE_BLOCK2 | |
| 348 | ||
| 349 | #define MMX_RESTORE_BLOCK2 \ | |
| a02705a9 MD |
350 | movl MYCPU,%ecx ; \ |
| 351 | movl GD_CURTHREAD(%ecx),%edx ; \ | |
| 352 | movl $0,GD_NPXTHREAD(%ecx) ; \ | |
| aad81e48 | 353 | andl $~TDF_KERNELFP,TD_FLAGS(%edx) ; \ |
| 65d6ce10 MD |
354 | movl %ebx,TD_SAVEFPU(%edx) ; \ |
| 355 | smsw %ax ; \ | |
| a02705a9 | 356 | popl %ebx ; \ |
| 65d6ce10 | 357 | orb $CR0_TS,%al ; \ |
| 263541db | 358 | lmsw %ax ; \ |
| a02705a9 | 359 | movl $0,GD_FPU_LOCK(%ecx) |
| 263541db MD |
360 | |
| 361 | /* | |
| 362 | * xmm/mmx_onfault routine. Restore the fpu state, skip the normal | |
| 363 | * return vector, and return to the caller's on-fault routine | |
| a02705a9 | 364 | * (which was pushed on the callers stack just before he called us) |
| 263541db | 365 | */ |
| a02705a9 | 366 | ALIGN_TEXT |
| 263541db MD |
367 | mmx_onfault: |
| 368 | MMX_RESTORE_BLOCK2 | |
| 369 | addl $4,%esp | |
| 370 | ret | |
| 371 | ||
| 372 | /* | |
| 373 | * MXX entry points - only support 64 bit media instructions | |
| 374 | */ | |
| 375 | SUPERALIGN_TEXT | |
| 376 | ENTRY(asm_mmx_memcpy) /* memcpy() entry point use optimal copy */ | |
| 377 | MMX_SAVE_BLOCK(asm_generic_memcpy) | |
| 378 | jmp 5f | |
| 379 | ||
| 380 | SUPERALIGN_TEXT | |
| 381 | ENTRY(asm_mmx_bcopy) | |
| 382 | MMX_SAVE_BLOCK(asm_generic_bcopy) | |
| 383 | cmpl %esi,%edi /* if (edi < esi) fwd copy ok */ | |
| 384 | jb 5f | |
| 385 | addl %ecx,%esi | |
| 386 | cmpl %esi,%edi /* if (edi < esi + count) do bkwrds copy */ | |
| 387 | jb 10f | |
| 388 | subl %ecx,%esi | |
| 389 | jmp 5f | |
| 390 | ||
| 391 | /* | |
| 392 | * XMM entry points - support 128 bit media instructions | |
| 393 | */ | |
| 394 | SUPERALIGN_TEXT | |
| 395 | ENTRY(asm_xmm_memcpy) /* memcpy() entry point use optimal copy */ | |
| 396 | MMX_SAVE_BLOCK(asm_generic_memcpy) | |
| 397 | jmp 1f | |
| 398 | ||
| 399 | SUPERALIGN_TEXT | |
| 400 | ENTRY(asm_xmm_bcopy) | |
| 401 | MMX_SAVE_BLOCK(asm_generic_bcopy) | |
| 402 | cmpl %esi,%edi /* if (edi < esi) fwd copy ok */ | |
| 403 | jb 1f | |
| 404 | addl %ecx,%esi | |
| 405 | cmpl %esi,%edi /* if (edi < esi + count) do bkwrds copy */ | |
| 406 | jb 10f | |
| 407 | subl %ecx,%esi | |
| 408 | 1: | |
| 409 | movl %esi,%eax /* skip xmm if the data is not aligned */ | |
| 410 | andl $15,%eax | |
| 411 | jnz 5f | |
| 412 | movl %edi,%eax | |
| 413 | andl $15,%eax | |
| 414 | jz 3f | |
| 415 | jmp 5f | |
| 416 | ||
| 417 | SUPERALIGN_TEXT | |
| 418 | 2: | |
| 419 | movdqa (%esi),%xmm0 | |
| 420 | movdqa 16(%esi),%xmm1 | |
| 421 | movdqa 32(%esi),%xmm2 | |
| 422 | movdqa 48(%esi),%xmm3 | |
| 423 | movdqa 64(%esi),%xmm4 | |
| 424 | movdqa 80(%esi),%xmm5 | |
| 425 | movdqa 96(%esi),%xmm6 | |
| 426 | movdqa 112(%esi),%xmm7 | |
| 427 | /*prefetchnta 128(%esi) 3dNOW */ | |
| 428 | addl $128,%esi | |
| 429 | ||
| 430 | /* | |
| 431 | * movdqa or movntdq can be used. | |
| 432 | */ | |
| 433 | movdqa %xmm0,(%edi) | |
| 434 | movdqa %xmm1,16(%edi) | |
| 435 | movdqa %xmm2,32(%edi) | |
| 436 | movdqa %xmm3,48(%edi) | |
| 437 | movdqa %xmm4,64(%edi) | |
| 438 | movdqa %xmm5,80(%edi) | |
| 439 | movdqa %xmm6,96(%edi) | |
| 440 | movdqa %xmm7,112(%edi) | |
| 441 | addl $128,%edi | |
| 442 | 3: | |
| 443 | subl $128,%ecx | |
| 444 | jae 2b | |
| 445 | addl $128,%ecx | |
| 446 | jz 6f | |
| 447 | jmp 5f | |
| 448 | SUPERALIGN_TEXT | |
| 449 | 4: | |
| 450 | movq (%esi),%mm0 | |
| 451 | movq 8(%esi),%mm1 | |
| 452 | movq 16(%esi),%mm2 | |
| 453 | movq 24(%esi),%mm3 | |
| 454 | movq 32(%esi),%mm4 | |
| 455 | movq 40(%esi),%mm5 | |
| 456 | movq 48(%esi),%mm6 | |
| 457 | movq 56(%esi),%mm7 | |
| 458 | /*prefetchnta 128(%esi) 3dNOW */ | |
| 459 | addl $64,%esi | |
| 460 | movq %mm0,(%edi) | |
| 461 | movq %mm1,8(%edi) | |
| 462 | movq %mm2,16(%edi) | |
| 463 | movq %mm3,24(%edi) | |
| 464 | movq %mm4,32(%edi) | |
| 465 | movq %mm5,40(%edi) | |
| 466 | movq %mm6,48(%edi) | |
| 467 | movq %mm7,56(%edi) | |
| 468 | addl $64,%edi | |
| 469 | 5: | |
| 470 | subl $64,%ecx | |
| 471 | jae 4b | |
| 472 | addl $64,%ecx | |
| 473 | jz 6f | |
| 474 | cld | |
| 475 | rep | |
| 476 | movsb | |
| 477 | 6: | |
| 478 | MMX_RESTORE_BLOCK | |
| 479 | ret | |
| 480 | ||
| 481 | /* | |
| 482 | * GENERIC_BCOPY() - BACKWARDS COPY | |
| 483 | * | |
| 484 | * Don't bother using xmm optimizations, just stick with mmx. | |
| 485 | */ | |
| 486 | SUPERALIGN_TEXT | |
| 487 | 10: | |
| 488 | addl %ecx,%edi | |
| 489 | jmp 12f | |
| 490 | ||
| 491 | SUPERALIGN_TEXT | |
| 492 | 11: | |
| 493 | movq -64(%esi),%mm0 | |
| 494 | movq -56(%esi),%mm1 | |
| 495 | movq -48(%esi),%mm2 | |
| 496 | movq -40(%esi),%mm3 | |
| 497 | movq -32(%esi),%mm4 | |
| 498 | movq -24(%esi),%mm5 | |
| 499 | movq -16(%esi),%mm6 | |
| 500 | movq -8(%esi),%mm7 | |
| 501 | /*prefetchnta -128(%esi)*/ | |
| 502 | subl $64,%esi | |
| 503 | movq %mm0,-64(%edi) | |
| 504 | movq %mm1,-56(%edi) | |
| 505 | movq %mm2,-48(%edi) | |
| 506 | movq %mm3,-40(%edi) | |
| 507 | movq %mm4,-32(%edi) | |
| 508 | movq %mm5,-24(%edi) | |
| 509 | movq %mm6,-16(%edi) | |
| 510 | movq %mm7,-8(%edi) | |
| 511 | subl $64,%edi | |
| 512 | 12: | |
| 513 | subl $64,%ecx | |
| 514 | jae 11b | |
| 515 | addl $64,%ecx | |
| 516 | jz 13f | |
| 517 | decl %esi | |
| 518 | decl %edi | |
| 519 | std | |
| 520 | rep | |
| 521 | movsb | |
| 522 | cld | |
| 523 | 13: | |
| 524 | MMX_RESTORE_BLOCK | |
| 525 | ret | |
| 526 |