diff options
Diffstat (limited to 'EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm')
-rw-r--r-- | EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm new file mode 100644 index 0000000..d5000d0 --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm @@ -0,0 +1,169 @@ + TITLE EfiCopyMem.asm: Optimized memory-copy routine + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; EfiCopyMem.asm +; +; Abstract: +; +; This is the code that supports IA32-optimized CopyMem service +; +;------------------------------------------------------------------------------ + +; PROC:PRIVATE + .686P + .XMM + .MODEL SMALL + .CODE + +EfiCommonLibCopyMem PROTO C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD + +;------------------------------------------------------------------------------ +; VOID +; EfiCommonLibCopyMem ( +; IN VOID *Destination, +; IN VOID *Source, +; IN UINTN Count +; ) +;------------------------------------------------------------------------------ + +EfiCommonLibCopyMem PROC C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD + + ; Put source and destination pointers in esi/edi + push esi + push edi + mov ecx, Count + mov esi, Source + mov edi, Destination + + ; First off, make sure we have no overlap. That is to say, + ; if (Source == Destination) => do nothing + ; if (Source + Count <= Destination) => regular copy + ; if (Destination + Count <= Source) => regular copy + ; otherwise, do a reverse copy + mov eax, esi + add eax, ecx ; Source + Count + cmp eax, edi + jle _StartByteCopy + + mov eax, edi + add eax, ecx ; Dest + Count + cmp eax, esi + jle _StartByteCopy + + cmp esi, edi + je _CopyMemDone + jl _CopyOverlapped ; too bad -- overlaps + + ; Pick up misaligned start bytes to get destination pointer 4-byte aligned +_StartByteCopy: + cmp ecx, 0 + je _CopyMemDone ; Count == 0, all done + mov edx, edi + and dl, 3 ; check lower 2 bits of address + test dl, dl + je SHORT _CopyBlocks ; already aligned? + + ; Copy a byte + mov al, BYTE PTR [esi] ; get byte from Source + mov BYTE PTR [edi], al ; write byte to Destination + dec ecx + inc edi + inc esi + jmp _StartByteCopy ; back to top of loop + +_CopyBlocks: + ; Compute how many 64-byte blocks we can clear + mov eax, ecx ; get Count in eax + shr eax, 6 ; convert to 64-byte count + shl eax, 6 ; convert back to bytes + sub ecx, eax ; subtract from the original count + shr eax, 6 ; and this is how many 64-byte blocks + + ; If no 64-byte blocks, then skip + cmp eax, 0 + je _CopyRemainingDWords + + +copyxmm: + + movdqu xmm0, OWORD PTR ds:[esi] + movdqu OWORD PTR ds:[edi], xmm0 + movdqu xmm1, OWORD PTR ds:[esi+16] + movdqu OWORD PTR ds:[edi+16], xmm1 + movdqu xmm2, OWORD PTR ds:[esi+32] + movdqu OWORD PTR ds:[edi+32], xmm2 + movdqu xmm3, OWORD PTR ds:[esi+48] + movdqu OWORD PTR ds:[edi+48], xmm3 + + add edi, 64 + add esi, 64 + dec eax + jnz copyxmm + + + ; Copy as many DWORDS as possible +_CopyRemainingDWords: + cmp ecx, 4 + jb _CopyRemainingBytes + + mov eax, DWORD PTR [esi] ; get data from Source + mov DWORD PTR [edi], eax ; write byte to Destination + sub ecx, 4 ; decrement Count + add esi, 4 ; advance Source pointer + add edi, 4 ; advance Destination pointer + jmp _CopyRemainingDWords ; back to top + +_CopyRemainingBytes: + cmp ecx, 0 + je _CopyMemDone + mov al, BYTE PTR [esi] ; get byte from Source + mov BYTE PTR [edi], al ; write byte to Destination + dec ecx + inc esi + inc edi ; advance Destination pointer + jmp SHORT _CopyRemainingBytes ; back to top of loop + + ; + ; We do this block if the source and destination buffers overlap. To + ; handle it, copy starting at the end of the source buffer and work + ; your way back. Since this is the atypical case, this code has not + ; been optimized, and thus simply copies bytes. + ; +_CopyOverlapped: + + ; Move the source and destination pointers to the end of the range + add esi, ecx ; Source + Count + dec esi + add edi, ecx ; Dest + Count + dec edi + +_CopyOverlappedLoop: + cmp ecx, 0 + je _CopyMemDone + mov al, BYTE PTR [esi] ; get byte from Source + mov BYTE PTR [edi], al ; write byte to Destination + dec ecx + dec esi + dec edi + jmp _CopyOverlappedLoop ; back to top of loop + +_CopyMemDone: + pop edi + pop esi + + ret + +EfiCommonLibCopyMem ENDP + END |