diff options
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/X64/CopyMem.S | 90 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/X64/ZeroMem.S | 5 |
2 files changed, 52 insertions, 43 deletions
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/X64/CopyMem.S b/MdePkg/Library/BaseMemoryLibOptDxe/X64/CopyMem.S index 426a602286..8fbc90f0e4 100644 --- a/MdePkg/Library/BaseMemoryLibOptDxe/X64/CopyMem.S +++ b/MdePkg/Library/BaseMemoryLibOptDxe/X64/CopyMem.S @@ -24,7 +24,6 @@ # #------------------------------------------------------------------------------ - #------------------------------------------------------------------------------ # VOID * # EFIAPI @@ -33,43 +32,52 @@ # IN VOID *Source, # IN UINTN Count # ) -#------------------------------------------------------------------------------ -.intel_syntax noprefix -.globl ASM_PFX(InternalMemCopyMem) -ASM_PFX(InternalMemCopyMem): - push rsi - push rdi - mov rsi, rdx # rsi <- Source - mov rdi, rcx # rdi <- Destination - lea r9, [rsi + r8 - 1] # r9 <- End of Source - cmp rsi, rdi - mov rax, rdi # rax <- Destination as return value - jae L0 - cmp r9, rdi - jae L_CopyBackward # Copy backward if overlapped -L0: - mov rcx, r8 - and r8, 7 - shr rcx, 3 # rcx <- # of Qwords to copy - jz L_CopyBytes - movd r10, mm0 # (Save mm0 in r10) -L1: - movq mm0, [rsi] - movntq [rdi], mm0 - add rsi, 8 - add rdi, 8 - loop L1 - mfence - movd mm0, r10 # (Restore mm0) - jmp L_CopyBytes -L_CopyBackward: - mov rsi, r9 # rsi <- End of Source - lea rdi, [rdi + r8 - 1] # rdi <- End of Destination - std # set direction flag -L_CopyBytes: - mov rcx, r8 - rep movsb # Copy bytes backward - cld - pop rdi - pop rsi - ret +#------------------------------------------------------------------------------
+.intel_syntax noprefix
+.globl ASM_PFX(InternalMemCopyMem)
+ASM_PFX(InternalMemCopyMem):
+ push rsi
+ push rdi
+ mov rsi, rdx # rsi <- Source
+ mov rdi, rcx # rdi <- Destination
+ lea r9, [rsi + r8 - 1] # r9 <- Last byte of Source
+ cmp rsi, rdi
+ mov rax, rdi # rax <- Destination as return value
+ jae L0 # Copy forward if Source > Destination
+ cmp r9, rdi # Overlapped?
+ jae @CopyBackward # Copy backward if overlapped
+L0:
+ xor rcx, rcx
+ sub rcx, rdi # rcx <- -rdi
+ and rcx, 15 # rcx + rsi should be 16 bytes aligned
+ jz L1 # skip if rcx == 0
+ cmp rcx, r8
+ cmova rcx, r8
+ sub r8, rcx
+ rep movsb
+L1:
+ mov rcx, r8
+ and r8, 15
+ shr rcx, 4 # rcx <- # of DQwords to copy
+ jz L_CopyBytes
+ movdqa [rsp + 0x18], xmm0 # save xmm0 on stack
+L2:
+ movdqu xmm0, [rsi] # rsi may not be 16-byte aligned
+ movntdq [rdi], xmm0 # rdi should be 16-byte aligned
+ add rsi, 16
+ add rdi, 16
+ loop L2
+ mfence
+ movdqa xmm0, [rsp + 0x18] # restore xmm0
+ jmp L_CopyBytes # copy remaining bytes
+L_CopyBackward:
+ mov rsi, r9 # rsi <- Last byte of Source
+ lea rdi, [rdi + r8 - 1] # rdi <- Last byte of Destination
+ std
+L_CopyBytes:
+ mov rcx, r8
+ rep movsb
+ cld
+ pop rdi
+ pop rsi
+ ret
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/X64/ZeroMem.S b/MdePkg/Library/BaseMemoryLibOptDxe/X64/ZeroMem.S index 97c3130709..9c6be9c97e 100644 --- a/MdePkg/Library/BaseMemoryLibOptDxe/X64/ZeroMem.S +++ b/MdePkg/Library/BaseMemoryLibOptDxe/X64/ZeroMem.S @@ -41,9 +41,10 @@ ASM_PFX(InternalMemZeroMem): mov rdi, rcx mov rcx, rdx shr rcx, 3 - and rdx, 7 + and rdx, 7
+ cld rep stosq - mov ecx, edx + mov rcx, rdx rep stosb pop rax pop rdi |