summaryrefslogtreecommitdiff
path: root/EdkCompatibilityPkg/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.c
diff options
context:
space:
mode:
Diffstat (limited to 'EdkCompatibilityPkg/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.c')
-rw-r--r--EdkCompatibilityPkg/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.c169
1 files changed, 169 insertions, 0 deletions
diff --git a/EdkCompatibilityPkg/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.c b/EdkCompatibilityPkg/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.c
new file mode 100644
index 0000000000..e3b23f92a0
--- /dev/null
+++ b/EdkCompatibilityPkg/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.c
@@ -0,0 +1,169 @@
+/*++
+
+Copyright (c) 2006, Intel Corporation
+All rights reserved. This program and the accompanying materials
+are licensed and made available under the terms and conditions of the BSD License
+which accompanies this distribution. The full text of the license may be found at
+http://opensource.org/licenses/bsd-license.php
+
+THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+
+Module Name:
+
+ EfiCopyMemSSE2.c
+
+Abstract:
+
+ This is the code that supports IA32-optimized CopyMem service
+
+--*/
+
+#include "Tiano.h"
+
+VOID
+EfiCommonLibCopyMem (
+ IN VOID *Destination,
+ IN VOID *Source,
+ IN UINTN Count
+ )
+/*++
+
+Routine Description:
+
+ Copy Length bytes from Source to Destination.
+
+Arguments:
+
+ Destination - Target of copy
+
+ Source - Place to copy from
+
+ Length - Number of bytes to copy
+
+Returns:
+
+ None
+
+--*/
+{
+ __asm {
+ mov ecx, Count
+ mov esi, Source
+ mov edi, Destination
+
+ ; First off, make sure we have no overlap. That is to say,
+ ; if (Source == Destination) => do nothing
+ ; if (Source + Count <= Destination) => regular copy
+ ; if (Destination + Count <= Source) => regular copy
+ ; otherwise, do a reverse copy
+ mov eax, esi
+ add eax, ecx ; Source + Count
+ cmp eax, edi
+ jle _StartByteCopy
+
+ mov eax, edi
+ add eax, ecx ; Dest + Count
+ cmp eax, esi
+ jle _StartByteCopy
+
+ cmp esi, edi
+ je _CopyMemDone
+ jl _CopyOverlapped ; too bad -- overlaps
+
+ ; Pick up misaligned start bytes to get destination pointer 4-byte aligned
+_StartByteCopy:
+ cmp ecx, 0
+ je _CopyMemDone ; Count == 0, all done
+ mov edx, edi
+ and dl, 3 ; check lower 2 bits of address
+ test dl, dl
+ je SHORT _CopyBlocks ; already aligned?
+
+ ; Copy a byte
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ inc edi
+ inc esi
+ jmp _StartByteCopy ; back to top of loop
+
+_CopyBlocks:
+ ; Compute how many 64-byte blocks we can clear
+ mov eax, ecx ; get Count in eax
+ shr eax, 6 ; convert to 64-byte count
+ shl eax, 6 ; convert back to bytes
+ sub ecx, eax ; subtract from the original count
+ shr eax, 6 ; and this is how many 64-byte blocks
+
+ ; If no 64-byte blocks, then skip
+ cmp eax, 0
+ je _CopyRemainingDWords
+
+
+copyxmm:
+
+ movdqu xmm0, OWORD PTR ds:[esi]
+ movdqu QWORD PTR ds:[edi], xmm0
+ movdqu xmm1, OWORD PTR ds:[esi+16]
+ movdqu QWORD PTR ds:[edi+16], xmm1
+ movdqu xmm2, OWORD PTR ds:[esi+32]
+ movdqu QWORD PTR ds:[edi+32], xmm2
+ movdqu xmm3, OWORD PTR ds:[esi+48]
+ movdqu QWORD PTR ds:[edi+48], xmm3
+
+ add edi, 64
+ add esi, 64
+ dec eax
+ jnz copyxmm
+
+
+ ; Copy as many DWORDS as possible
+_CopyRemainingDWords:
+ cmp ecx, 4
+ jb _CopyRemainingBytes
+
+ mov eax, DWORD PTR [esi] ; get data from Source
+ mov DWORD PTR [edi], eax ; write byte to Destination
+ sub ecx, 4 ; decrement Count
+ add esi, 4 ; advance Source pointer
+ add edi, 4 ; advance Destination pointer
+ jmp _CopyRemainingDWords ; back to top
+
+_CopyRemainingBytes:
+ cmp ecx, 0
+ je _CopyMemDone
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ inc esi
+ inc edi ; advance Destination pointer
+ jmp SHORT _CopyRemainingBytes ; back to top of loop
+
+ ;
+ ; We do this block if the source and destination buffers overlap. To
+ ; handle it, copy starting at the end of the source buffer and work
+ ; your way back. Since this is the atypical case, this code has not
+ ; been optimized, and thus simply copies bytes.
+ ;
+_CopyOverlapped:
+
+ ; Move the source and destination pointers to the end of the range
+ add esi, ecx ; Source + Count
+ dec esi
+ add edi, ecx ; Dest + Count
+ dec edi
+
+_CopyOverlappedLoop:
+ cmp ecx, 0
+ je _CopyMemDone
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ dec esi
+ dec edi
+ jmp _CopyOverlappedLoop ; back to top of loop
+
+_CopyMemDone:
+ }
+}