summaryrefslogtreecommitdiff
path: root/Core/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S
diff options
context:
space:
mode:
Diffstat (limited to 'Core/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S')
-rw-r--r--Core/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S175
1 files changed, 175 insertions, 0 deletions
diff --git a/Core/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S b/Core/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S
new file mode 100644
index 0000000000..195a0b23f7
--- /dev/null
+++ b/Core/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S
@@ -0,0 +1,175 @@
+#------------------------------------------------------------------------------
+#
+# CopyMem() worker for ARM
+#
+# This file started out as C code that did 64 bit moves if the buffer was
+# 32-bit aligned, else it does a byte copy. It also does a byte copy for
+# any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
+#
+# Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>
+# Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
+# This program and the accompanying materials
+# are licensed and made available under the terms and conditions of the BSD License
+# which accompanies this distribution. The full text of the license may be found at
+# http://opensource.org/licenses/bsd-license.php
+#
+# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+#
+#------------------------------------------------------------------------------
+
+ .text
+ .thumb
+ .syntax unified
+
+/**
+ Copy Length bytes from Source to Destination. Overlap is OK.
+
+ This implementation
+
+ @param Destination Target of copy
+ @param Source Place to copy from
+ @param Length Number of bytes to copy
+
+ @return Destination
+
+
+VOID *
+EFIAPI
+InternalMemCopyMem (
+ OUT VOID *DestinationBuffer,
+ IN CONST VOID *SourceBuffer,
+ IN UINTN Length
+ )
+**/
+ASM_GLOBAL ASM_PFX(InternalMemCopyMem)
+ASM_PFX(InternalMemCopyMem):
+ push {r4-r11, lr}
+ // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
+ mov r11, r0
+ mov r10, r0
+ mov r12, r2
+ mov r14, r1
+
+ cmp r11, r1
+ // If (dest < source)
+ bcc memcopy_check_optim_default
+
+ // If (source + length < dest)
+ rsb r3, r1, r11
+ cmp r12, r3
+ bcc memcopy_check_optim_default
+ b memcopy_check_optim_overlap
+
+memcopy_check_optim_default:
+ // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
+ tst r0, #0xF
+ it ne
+ movne.n r0, #0
+ bne memcopy_default
+ tst r1, #0xF
+ it ne
+ movne.n r3, #0
+ it eq
+ moveq.n r3, #1
+ cmp r2, #31
+ it ls
+ movls.n r0, #0
+ bls memcopy_default
+ and r0, r3, #1
+ b memcopy_default
+
+memcopy_check_optim_overlap:
+ // r10 = dest_end, r14 = source_end
+ add r10, r11, r12
+ add r14, r12, r1
+
+ // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
+ cmp r2, #31
+ it ls
+ movls.n r0, #0
+ it hi
+ movhi.n r0, #1
+ tst r10, #0xF
+ it ne
+ movne.n r0, #0
+ tst r14, #0xF
+ it ne
+ movne.n r0, #0
+ b memcopy_overlapped
+
+memcopy_overlapped_non_optim:
+ // We read 1 byte from the end of the source buffer
+ sub r3, r14, #1
+ sub r12, r12, #1
+ ldrb r3, [r3, #0]
+ sub r2, r10, #1
+ cmp r12, #0
+ // We write 1 byte at the end of the dest buffer
+ sub r10, r10, #1
+ sub r14, r14, #1
+ strb r3, [r2, #0]
+ bne memcopy_overlapped_non_optim
+ b memcopy_end
+
+// r10 = dest_end, r14 = source_end
+memcopy_overlapped:
+ // Are we in the optimized case ?
+ cmp r0, #0
+ beq memcopy_overlapped_non_optim
+
+ // Optimized Overlapped - Read 32 bytes
+ sub r14, r14, #32
+ sub r12, r12, #32
+ cmp r12, #31
+ ldmia r14, {r2-r9}
+
+ // If length is less than 32 then disable optim
+ it ls
+ movls.n r0, #0
+
+ cmp r12, #0
+
+ // Optimized Overlapped - Write 32 bytes
+ sub r10, r10, #32
+ stmia r10, {r2-r9}
+
+ // while (length != 0)
+ bne memcopy_overlapped
+ b memcopy_end
+
+memcopy_default_non_optim:
+ // Byte copy
+ ldrb r3, [r14], #1
+ sub r12, r12, #1
+ strb r3, [r10], #1
+
+memcopy_default:
+ cmp r12, #0
+ beq memcopy_end
+
+// r10 = dest, r14 = source
+memcopy_default_loop:
+ cmp r0, #0
+ beq memcopy_default_non_optim
+
+ // Optimized memcopy - Read 32 Bytes
+ sub r12, r12, #32
+ cmp r12, #31
+ ldmia r14!, {r2-r9}
+
+ // If length is less than 32 then disable optim
+ it ls
+ movls.n r0, #0
+
+ cmp r12, #0
+
+ // Optimized memcopy - Write 32 Bytes
+ stmia r10!, {r2-r9}
+
+ // while (length != 0)
+ bne memcopy_default_loop
+
+memcopy_end:
+ mov r0, r11
+ pop {r4-r11, pc}