From d39eb83cc5ce2147264e2ea3566c4dbf7eb9888d Mon Sep 17 00:00:00 2001 From: andrewfish Date: Wed, 21 Apr 2010 22:04:35 +0000 Subject: Add some ldm/vldm optimized CopyMem routines. Add performance macros to BDS git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@10388 6f19259b-4bc3-4df7-8a09-765794883524 --- ArmPkg/Library/BaseMemoryLibVstm/Arm/CopyMem.S | 114 ++++++++++++++++++++++ ArmPkg/Library/BaseMemoryLibVstm/Arm/CopyMem.asm | 115 +++++++++++++++++++++++ ArmPkg/Library/BaseMemoryLibVstm/Arm/SetMem.S | 80 ++++++++++++++++ ArmPkg/Library/BaseMemoryLibVstm/Arm/SetMem.asm | 80 ++++++++++++++++ 4 files changed, 389 insertions(+) create mode 100755 ArmPkg/Library/BaseMemoryLibVstm/Arm/CopyMem.S create mode 100755 ArmPkg/Library/BaseMemoryLibVstm/Arm/CopyMem.asm create mode 100755 ArmPkg/Library/BaseMemoryLibVstm/Arm/SetMem.S create mode 100755 ArmPkg/Library/BaseMemoryLibVstm/Arm/SetMem.asm (limited to 'ArmPkg/Library/BaseMemoryLibVstm/Arm') diff --git a/ArmPkg/Library/BaseMemoryLibVstm/Arm/CopyMem.S b/ArmPkg/Library/BaseMemoryLibVstm/Arm/CopyMem.S new file mode 100755 index 0000000000..2ce686c152 --- /dev/null +++ b/ArmPkg/Library/BaseMemoryLibVstm/Arm/CopyMem.S @@ -0,0 +1,114 @@ +#------------------------------------------------------------------------------ +# +# CopyMem() worker for ARM +# +# This file started out as C code that did 64 bit moves if the buffer was +# 32-bit aligned, else it does a byte copy. It also does a byte copy for +# any trailing bytes. Update using VSTM/SLDM to do 128 byte copies. +# +# Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+# All rights reserved. This program and the accompanying materials +# are licensed and made available under the terms and conditions of the BSD License +# which accompanies this distribution. The full text of the license may be found at +# http://opensource.org/licenses/bsd-license.php +# +# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +# +#------------------------------------------------------------------------------ + +/** + Copy Length bytes from Source to Destination. Overlap is OK. + + This implementation + + @param Destination Target of copy + @param Source Place to copy from + @param Length Number of bytes to copy + + @return Destination + + +VOID * +EFIAPI +InternalMemCopyMem ( + OUT VOID *DestinationBuffer, + IN CONST VOID *SourceBuffer, + IN UINTN Length + ) +**/ +.text +.align 2 +.globl ASM_PFX(InternalMemCopyMem) + +ASM_PFX(InternalMemCopyMem): + stmfd sp!, {r4, r9, lr} + tst r0, #3 + mov r4, r0 + mov r9, r0 + mov ip, r2 + mov lr, r1 + movne r0, #0 + bne L4 + tst r1, #3 + movne r3, #0 + moveq r3, #1 + cmp r2, #127 + movls r0, #0 + andhi r0, r3, #1 +L4: + cmp r4, r1 + bcc L26 + bls L7 + rsb r3, r1, r4 + cmp ip, r3 + bcc L26 + cmp ip, #0 + beq L7 + add r9, r4, ip + add lr, ip, r1 + b L16 +L29: + sub ip, ip, #8 + cmp ip, #7 + ldrd r2, [lr, #-8]! + movls r0, #0 + cmp ip, #0 + strd r2, [r9, #-8]! + beq L7 +L16: + cmp r0, #0 + bne L29 + sub r3, lr, #1 + sub ip, ip, #1 + ldrb r3, [r3, #0] + sub r2, r9, #1 + cmp ip, #0 + sub r9, r9, #1 + sub lr, lr, #1 + strb r3, [r2, #0] + bne L16 + b L7 +L11: + ldrb r3, [lr], #1 + sub ip, ip, #1 + strb r3, [r9], #1 +L26: + cmp ip, #0 + beq L7 +L30: + cmp r0, #0 + beq L11 + sub ip, ip, #128 // 32 + cmp ip, #127 // 31 + vldm lr!, {d0-d15} + movls r0, #0 + cmp ip, #0 + vstm r9!, {d0-d15} + bne L30 +L7: + dsb + mov r0, r4 + ldmfd sp!, {r4, r9, pc} + + diff --git a/ArmPkg/Library/BaseMemoryLibVstm/Arm/CopyMem.asm b/ArmPkg/Library/BaseMemoryLibVstm/Arm/CopyMem.asm new file mode 100755 index 0000000000..ce049d2eed --- /dev/null +++ b/ArmPkg/Library/BaseMemoryLibVstm/Arm/CopyMem.asm @@ -0,0 +1,115 @@ +;------------------------------------------------------------------------------ +; +; CopyMem() worker for ARM +; +; This file started out as C code that did 64 bit moves if the buffer was +; 32-bit aligned, else it does a byte copy. It also does a byte copy for +; any trailing bytes. Update using VSTM/SLDM to do 128 byte copies. +; +; Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +;------------------------------------------------------------------------------ + +/** + Copy Length bytes from Source to Destination. Overlap is OK. + + This implementation + + @param Destination Target of copy + @param Source Place to copy from + @param Length Number of bytes to copy + + @return Destination + + +VOID * +EFIAPI +InternalMemCopyMem ( + OUT VOID *DestinationBuffer, + IN CONST VOID *SourceBuffer, + IN UINTN Length + ) +**/ + EXPORT InternalMemCopyMem + + AREA AsmMemStuff, CODE, READONLY + +InternalMemCopyMem + stmfd sp!, {r4, r9, lr} + tst r0, #3 + mov r4, r0 + mov r9, r0 + mov ip, r2 + mov lr, r1 + movne r0, #0 + bne L4 + tst r1, #3 + movne r3, #0 + moveq r3, #1 + cmp r2, #127 + movls r0, #0 + andhi r0, r3, #1 +L4 + cmp r4, r1 + bcc L26 + bls L7 + rsb r3, r1, r4 + cmp ip, r3 + bcc L26 + cmp ip, #0 + beq L7 + add r9, r4, ip + add lr, ip, r1 + b L16 +L29 + sub ip, ip, #8 + cmp ip, #7 + ldrd r2, [lr, #-8]! + movls r0, #0 + cmp ip, #0 + strd r2, [r9, #-8]! + beq L7 +L16 + cmp r0, #0 + bne L29 + sub r3, lr, #1 + sub ip, ip, #1 + ldrb r3, [r3, #0] + sub r2, r9, #1 + cmp ip, #0 + sub r9, r9, #1 + sub lr, lr, #1 + strb r3, [r2, #0] + bne L16 + b L7 +L11 + ldrb r3, [lr], #1 + sub ip, ip, #1 + strb r3, [r9], #1 +L26 + cmp ip, #0 + beq L7 +L30 + cmp r0, #0 + beq L11 + sub ip, ip, #128 // 32 + cmp ip, #127 // 31 + vldm lr!, {d0-d15} + movls r0, #0 + cmp ip, #0 + vstm r9!, {d0-d15} + bne L30 +L7 + dsb + mov r0, r4 + ldmfd sp!, {r4, r9, pc} + + END + diff --git a/ArmPkg/Library/BaseMemoryLibVstm/Arm/SetMem.S b/ArmPkg/Library/BaseMemoryLibVstm/Arm/SetMem.S new file mode 100755 index 0000000000..44c6d4bece --- /dev/null +++ b/ArmPkg/Library/BaseMemoryLibVstm/Arm/SetMem.S @@ -0,0 +1,80 @@ +#------------------------------------------------------------------------------ +# +# SemMem() worker for ARM +# +# This file started out as C code that did 64 bit moves if the buffer was +# 32-bit aligned, else it does a byte copy. It also does a byte copy for +# any trailing bytes. Update to use VSTM/VLDM to do 128 byte writes. +# +# Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+# All rights reserved. This program and the accompanying materials +# are licensed and made available under the terms and conditions of the BSD License +# which accompanies this distribution. The full text of the license may be found at +# http://opensource.org/licenses/bsd-license.php +# +# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +# +#------------------------------------------------------------------------------ + +/** + Set Buffer to Value for Size bytes. + + @param Buffer Memory to set. + @param Length Number of bytes to set + @param Value Value of the set operation. + + @return Buffer + +VOID * +EFIAPI +InternalMemSetMem ( + OUT VOID *Buffer, + IN UINTN Length, + IN UINT8 Value + ) +**/ + +.text +.align 2 +.globl ASM_PFX(InternalMemSetMem) + +ASM_PFX(InternalMemSetMem): + stmfd sp!, {r4-r7, lr} + tst r0, #3 + movne r3, #0 + moveq r3, #1 + cmp r1, #127 + movls lr, #0 + andhi lr, r3, #1 + cmp lr, #0 + mov r12, r0 + bne L31 +L32: + mov r3, #0 + b L43 +L31: + vdup.8 q0,r2 + vmov q1,q0 + vmov q2,q0 + vmov q3,q0 + vmov q4,q0 + vmov q5,q0 + vmov q6,q0 + vmov q7,q0 + b L32 +L34: + cmp lr, #0 + streqb r2, [r12], #1 + subeq r1, r1, #1 + beq L43 + sub r1, r1, #128 + cmp r1, #127 + cmp r1, #31 + movls lr, r3 + vstm r12!, {d0-d15} +L43: + cmp r1, #0 + bne L34 + ldmfd sp!, {pc} + \ No newline at end of file diff --git a/ArmPkg/Library/BaseMemoryLibVstm/Arm/SetMem.asm b/ArmPkg/Library/BaseMemoryLibVstm/Arm/SetMem.asm new file mode 100755 index 0000000000..4e16ac61bf --- /dev/null +++ b/ArmPkg/Library/BaseMemoryLibVstm/Arm/SetMem.asm @@ -0,0 +1,80 @@ +;------------------------------------------------------------------------------ +; +; SetMem() worker for ARM +; +; This file started out as C code that did 64 bit moves if the buffer was +; 32-bit aligned, else it does a byte copy. It also does a byte copy for +; any trailing bytes. Update to use VSTM/VLDM to do 128 byte writes. +; +; Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; + +/** + Set Buffer to Value for Size bytes. + + @param Buffer Memory to set. + @param Length Number of bytes to set + @param Value Value of the set operation. + + @return Buffer + +VOID * +EFIAPI +InternalMemSetMem ( + OUT VOID *Buffer, + IN UINTN Length, + IN UINT8 Value + ) +**/ + + EXPORT InternalMemSetMem + + AREA AsmMemStuff, CODE, READONLY + +InternalMemSetMem + stmfd sp!, {lr} + tst r0, #3 + movne r3, #0 + moveq r3, #1 + cmp r1, #127 + movls lr, #0 + andhi lr, r3, #1 + cmp lr, #0 + mov r12, r0 + bne L31 +L32 + mov r3, #0 + b L43 +L31 + vdup.8 q0,r2 + vmov q1,q0 + vmov q2,q0 + vmov q3,q0 + vmov q4,q0 + vmov q5,q0 + vmov q6,q0 + vmov q7,q0 + b L32 +L34 + cmp lr, #0 + streqb r2, [r12], #1 + subeq r1, r1, #1 + beq L43 + sub r1, r1, #128 + cmp r1, #127 + movls lr, r3 + vstm r12!, {d0-d15} +L43 + cmp r1, #0 + bne L34 + ldmfd sp!, {pc} + + END + \ No newline at end of file -- cgit v1.2.3