summaryrefslogtreecommitdiff
path: root/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm
diff options
context:
space:
mode:
Diffstat (limited to 'EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm')
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm169
1 files changed, 169 insertions, 0 deletions
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm
new file mode 100644
index 0000000..d5000d0
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm
@@ -0,0 +1,169 @@
+ TITLE EfiCopyMem.asm: Optimized memory-copy routine
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; EfiCopyMem.asm
+;
+; Abstract:
+;
+; This is the code that supports IA32-optimized CopyMem service
+;
+;------------------------------------------------------------------------------
+
+; PROC:PRIVATE
+ .686P
+ .XMM
+ .MODEL SMALL
+ .CODE
+
+EfiCommonLibCopyMem PROTO C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD
+
+;------------------------------------------------------------------------------
+; VOID
+; EfiCommonLibCopyMem (
+; IN VOID *Destination,
+; IN VOID *Source,
+; IN UINTN Count
+; )
+;------------------------------------------------------------------------------
+
+EfiCommonLibCopyMem PROC C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD
+
+ ; Put source and destination pointers in esi/edi
+ push esi
+ push edi
+ mov ecx, Count
+ mov esi, Source
+ mov edi, Destination
+
+ ; First off, make sure we have no overlap. That is to say,
+ ; if (Source == Destination) => do nothing
+ ; if (Source + Count <= Destination) => regular copy
+ ; if (Destination + Count <= Source) => regular copy
+ ; otherwise, do a reverse copy
+ mov eax, esi
+ add eax, ecx ; Source + Count
+ cmp eax, edi
+ jle _StartByteCopy
+
+ mov eax, edi
+ add eax, ecx ; Dest + Count
+ cmp eax, esi
+ jle _StartByteCopy
+
+ cmp esi, edi
+ je _CopyMemDone
+ jl _CopyOverlapped ; too bad -- overlaps
+
+ ; Pick up misaligned start bytes to get destination pointer 4-byte aligned
+_StartByteCopy:
+ cmp ecx, 0
+ je _CopyMemDone ; Count == 0, all done
+ mov edx, edi
+ and dl, 3 ; check lower 2 bits of address
+ test dl, dl
+ je SHORT _CopyBlocks ; already aligned?
+
+ ; Copy a byte
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ inc edi
+ inc esi
+ jmp _StartByteCopy ; back to top of loop
+
+_CopyBlocks:
+ ; Compute how many 64-byte blocks we can clear
+ mov eax, ecx ; get Count in eax
+ shr eax, 6 ; convert to 64-byte count
+ shl eax, 6 ; convert back to bytes
+ sub ecx, eax ; subtract from the original count
+ shr eax, 6 ; and this is how many 64-byte blocks
+
+ ; If no 64-byte blocks, then skip
+ cmp eax, 0
+ je _CopyRemainingDWords
+
+
+copyxmm:
+
+ movdqu xmm0, OWORD PTR ds:[esi]
+ movdqu OWORD PTR ds:[edi], xmm0
+ movdqu xmm1, OWORD PTR ds:[esi+16]
+ movdqu OWORD PTR ds:[edi+16], xmm1
+ movdqu xmm2, OWORD PTR ds:[esi+32]
+ movdqu OWORD PTR ds:[edi+32], xmm2
+ movdqu xmm3, OWORD PTR ds:[esi+48]
+ movdqu OWORD PTR ds:[edi+48], xmm3
+
+ add edi, 64
+ add esi, 64
+ dec eax
+ jnz copyxmm
+
+
+ ; Copy as many DWORDS as possible
+_CopyRemainingDWords:
+ cmp ecx, 4
+ jb _CopyRemainingBytes
+
+ mov eax, DWORD PTR [esi] ; get data from Source
+ mov DWORD PTR [edi], eax ; write byte to Destination
+ sub ecx, 4 ; decrement Count
+ add esi, 4 ; advance Source pointer
+ add edi, 4 ; advance Destination pointer
+ jmp _CopyRemainingDWords ; back to top
+
+_CopyRemainingBytes:
+ cmp ecx, 0
+ je _CopyMemDone
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ inc esi
+ inc edi ; advance Destination pointer
+ jmp SHORT _CopyRemainingBytes ; back to top of loop
+
+ ;
+ ; We do this block if the source and destination buffers overlap. To
+ ; handle it, copy starting at the end of the source buffer and work
+ ; your way back. Since this is the atypical case, this code has not
+ ; been optimized, and thus simply copies bytes.
+ ;
+_CopyOverlapped:
+
+ ; Move the source and destination pointers to the end of the range
+ add esi, ecx ; Source + Count
+ dec esi
+ add edi, ecx ; Dest + Count
+ dec edi
+
+_CopyOverlappedLoop:
+ cmp ecx, 0
+ je _CopyMemDone
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ dec esi
+ dec edi
+ jmp _CopyOverlappedLoop ; back to top of loop
+
+_CopyMemDone:
+ pop edi
+ pop esi
+
+ ret
+
+EfiCommonLibCopyMem ENDP
+ END