summaryrefslogtreecommitdiff
path: root/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm
diff options
context:
space:
mode:
Diffstat (limited to 'EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm')
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm183
1 files changed, 183 insertions, 0 deletions
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm
new file mode 100644
index 0000000..504c08a
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm
@@ -0,0 +1,183 @@
+ TITLE EfiCopyMem.asm: Optimized memory-copy routine
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; EfiCopyMem.asm
+;
+; Abstract:
+;
+; This is the code that supports IA32-optimized CopyMem service
+;
+;------------------------------------------------------------------------------
+
+; PROC:PRIVATE
+ .686P
+ .MMX
+ .MODEL SMALL
+ .CODE
+
+EfiCommonLibCopyMem PROTO C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD
+
+;------------------------------------------------------------------------------
+; VOID
+; EfiCommonLibCopyMem (
+; IN VOID *Destination,
+; IN VOID *Source,
+; IN UINTN Count
+; )
+;------------------------------------------------------------------------------
+
+EfiCommonLibCopyMem PROC C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD
+ LOCAL MmxSave:QWORD
+
+ ; Put source and destination pointers in esi/edi
+ push esi
+ push edi
+ mov ecx, Count
+ mov esi, Source
+ mov edi, Destination
+
+ ; First off, make sure we have no overlap. That is to say,
+ ; if (Source == Destination) => do nothing
+ ; if (Source + Count <= Destination) => regular copy
+ ; if (Destination + Count <= Source) => regular copy
+ ; otherwise, do a reverse copy
+ mov eax, esi
+ add eax, ecx ; Source + Count
+ cmp eax, edi
+ jbe _StartByteCopy
+
+ mov eax, edi
+ add eax, ecx ; Dest + Count
+ cmp eax, esi
+ jbe _StartByteCopy
+
+ cmp esi, edi
+ je _CopyMemDone
+ jb _CopyOverlapped ; too bad -- overlaps
+
+ ; Pick up misaligned start bytes to get destination pointer 4-byte aligned
+_StartByteCopy:
+ cmp ecx, 0
+ je _CopyMemDone ; Count == 0, all done
+ mov edx, edi
+ and dl, 3 ; check lower 2 bits of address
+ test dl, dl
+ je SHORT _CopyBlocks ; already aligned?
+
+ ; Copy a byte
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ inc edi
+ inc esi
+ jmp _StartByteCopy ; back to top of loop
+
+_CopyBlocks:
+ ; Compute how many 64-byte blocks we can clear
+ mov eax, ecx ; get Count in eax
+ shr eax, 6 ; convert to 64-byte count
+ shl eax, 6 ; convert back to bytes
+ sub ecx, eax ; subtract from the original count
+ shr eax, 6 ; and this is how many 64-byte blocks
+
+ ; If no 64-byte blocks, then skip
+ cmp eax, 0
+ je _CopyRemainingDWords
+
+ ; Save mm0
+ movq MmxSave, mm0
+
+copymmx:
+
+ movq mm0, QWORD PTR ds:[esi]
+ movq QWORD PTR ds:[edi], mm0
+ movq mm0, QWORD PTR ds:[esi+8]
+ movq QWORD PTR ds:[edi+8], mm0
+ movq mm0, QWORD PTR ds:[esi+16]
+ movq QWORD PTR ds:[edi+16], mm0
+ movq mm0, QWORD PTR ds:[esi+24]
+ movq QWORD PTR ds:[edi+24], mm0
+ movq mm0, QWORD PTR ds:[esi+32]
+ movq QWORD PTR ds:[edi+32], mm0
+ movq mm0, QWORD PTR ds:[esi+40]
+ movq QWORD PTR ds:[edi+40], mm0
+ movq mm0, QWORD PTR ds:[esi+48]
+ movq QWORD PTR ds:[edi+48], mm0
+ movq mm0, QWORD PTR ds:[esi+56]
+ movq QWORD PTR ds:[edi+56], mm0
+
+ add edi, 64
+ add esi, 64
+ dec eax
+ jnz copymmx
+
+; Restore mm0
+ movq mm0, MmxSave
+ emms ; Exit MMX Instruction
+
+ ; Copy as many DWORDS as possible
+_CopyRemainingDWords:
+ cmp ecx, 4
+ jb _CopyRemainingBytes
+
+ mov eax, DWORD PTR [esi] ; get data from Source
+ mov DWORD PTR [edi], eax ; write byte to Destination
+ sub ecx, 4 ; decrement Count
+ add esi, 4 ; advance Source pointer
+ add edi, 4 ; advance Destination pointer
+ jmp _CopyRemainingDWords ; back to top
+
+_CopyRemainingBytes:
+ cmp ecx, 0
+ je _CopyMemDone
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ inc esi
+ inc edi ; advance Destination pointer
+ jmp SHORT _CopyRemainingBytes ; back to top of loop
+
+ ;
+ ; We do this block if the source and destination buffers overlap. To
+ ; handle it, copy starting at the end of the source buffer and work
+ ; your way back. Since this is the atypical case, this code has not
+ ; been optimized, and thus simply copies bytes.
+ ;
+_CopyOverlapped:
+
+ ; Move the source and destination pointers to the end of the range
+ add esi, ecx ; Source + Count
+ dec esi
+ add edi, ecx ; Dest + Count
+ dec edi
+
+_CopyOverlappedLoop:
+ cmp ecx, 0
+ je _CopyMemDone
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ dec esi
+ dec edi
+ jmp _CopyOverlappedLoop ; back to top of loop
+
+_CopyMemDone:
+ pop edi
+ pop esi
+
+ ret
+
+EfiCommonLibCopyMem ENDP
+ END