summaryrefslogtreecommitdiff
path: root/ArmPkg
diff options
context:
space:
mode:
Diffstat (limited to 'ArmPkg')
-rwxr-xr-xArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.S153
-rwxr-xr-xArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.asm153
2 files changed, 212 insertions, 94 deletions
diff --git a/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.S b/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.S
index 1298f94e8d..4e0122bdec 100755
--- a/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.S
+++ b/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.S
@@ -43,70 +43,129 @@ GCC_ASM_EXPORT(InternalMemCopyMem)
ASM_PFX(InternalMemCopyMem):
stmfd sp!, {r4-r11, lr}
- tst r0, #3
+ // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
mov r11, r0
mov r10, r0
- mov ip, r2
- mov lr, r1
+ mov r12, r2
+ mov r14, r1
+
+memcopy_check_overlapped:
+ cmp r11, r1
+ // If (dest < source)
+ bcc memcopy_check_optim_default
+ // If (dest <= source). But with the previous condition -> If (dest == source)
+ bls memcopy_end
+
+ // If (source + length < dest)
+ rsb r3, r1, r11
+ cmp r12, r3
+ bcc memcopy_check_optim_default
+
+ // If (length == 0)
+ cmp r12, #0
+ beq memcopy_end
+
+ b memcopy_check_optim_overlap
+
+memcopy_check_optim_default:
+ // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
+ tst r0, #0xF
movne r0, #0
- bne L4
- tst r1, #3
+ bne memcopy_default
+ tst r1, #0xF
movne r3, #0
moveq r3, #1
cmp r2, #31
movls r0, #0
andhi r0, r3, #1
-L4:
- cmp r11, r1
- bcc L26
- bls L7
- rsb r3, r1, r11
- cmp ip, r3
- bcc L26
- cmp ip, #0
- beq L7
- add r10, r11, ip
- add lr, ip, r1
- b L16
-L29:
- sub ip, ip, #8
- cmp ip, #7
- ldrd r2, [lr, #-8]!
+ b memcopy_default
+
+memcopy_check_optim_overlap:
+ // r10 = dest_end, r14 = source_end
+ add r10, r11, r12
+ add r14, r12, r1
+
+ // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
+ cmp r2, #31
movls r0, #0
- cmp ip, #0
- strd r2, [r10, #-8]!
- beq L7
-L16:
- cmp r0, #0
- bne L29
- sub r3, lr, #1
- sub ip, ip, #1
+ movhi r0, #1
+ tst r10, #0xF
+ movne r0, #0
+ tst r14, #0xF
+ movne r0, #0
+ b memcopy_overlapped
+
+memcopy_overlapped_non_optim:
+ // We read 1 byte from the end of the source buffer
+ sub r3, r14, #1
+ sub r12, r12, #1
ldrb r3, [r3, #0]
sub r2, r10, #1
- cmp ip, #0
+ cmp r12, #0
+ // We write 1 byte at the end of the dest buffer
sub r10, r10, #1
- sub lr, lr, #1
+ sub r14, r14, #1
strb r3, [r2, #0]
- bne L16
- b L7
-L11:
- ldrb r3, [lr], #1
- sub ip, ip, #1
+ bne memcopy_overlapped_non_optim
+ b memcopy_end
+
+// r10 = dest_end, r14 = source_end
+memcopy_overlapped:
+ // Are we in the optimized case ?
+ cmp r0, #0
+ beq memcopy_overlapped_non_optim
+
+ // Optimized Overlapped - Read 32 bytes
+ sub r14, r14, #32
+ sub r12, r12, #32
+ cmp r12, #31
+ ldmia r14, {r2-r9}
+
+ // If length is less than 32 then disable optim
+ movls r0, #0
+
+ cmp r12, #0
+
+ // Optimized Overlapped - Write 32 bytes
+ sub r10, r10, #32
+ stmia r10, {r2-r9}
+
+ // while (length != 0)
+ bne memcopy_overlapped
+ b memcopy_end
+
+memcopy_default_non_optim:
+ // Byte copy
+ ldrb r3, [r14], #1
+ sub r12, r12, #1
strb r3, [r10], #1
-L26:
- cmp ip, #0
- beq L7
-L30:
+
+memcopy_default:
+ cmp r12, #0
+ beq memcopy_end
+
+// r10 = dest, r14 = source
+memcopy_default_loop:
cmp r0, #0
- beq L11
- sub ip, ip, #32
- cmp ip, #31
- ldmia lr!, {r2-r9}
+ beq memcopy_default_non_optim
+
+ // Optimized memcopy - Read 32 Bytes
+ sub r12, r12, #32
+ cmp r12, #31
+ ldmia r14!, {r2-r9}
+
+ // If length is less than 32 then disable optim
movls r0, #0
- cmp ip, #0
+
+ cmp r12, #0
+
+ // Optimized memcopy - Write 32 Bytes
stmia r10!, {r2-r9}
- bne L30
-L7:
+
+ // while (length != 0)
+ bne memcopy_default_loop
+
+memcopy_end:
mov r0, r11
ldmfd sp!, {r4-r11, pc}
diff --git a/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.asm b/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.asm
index 1a5e18e120..ca8d06a550 100755
--- a/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.asm
+++ b/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.asm
@@ -43,70 +43,129 @@ InternalMemCopyMem (
InternalMemCopyMem
stmfd sp!, {r4-r11, lr}
- tst r0, #3
+ // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
mov r11, r0
mov r10, r0
- mov ip, r2
- mov lr, r1
+ mov r12, r2
+ mov r14, r1
+
+memcopy_check_overlapped
+ cmp r11, r1
+ // If (dest < source)
+ bcc memcopy_check_optim_default
+ // If (dest <= source). But with the previous condition -> If (dest == source)
+ bls memcopy_end
+
+ // If (source + length < dest)
+ rsb r3, r1, r11
+ cmp r12, r3
+ bcc memcopy_check_optim_default
+
+ // If (length == 0)
+ cmp r12, #0
+ beq memcopy_end
+
+ b memcopy_check_optim_overlap
+
+memcopy_check_optim_default
+ // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
+ tst r0, #0xF
movne r0, #0
- bne L4
- tst r1, #3
+ bne memcopy_default
+ tst r1, #0xF
movne r3, #0
moveq r3, #1
cmp r2, #31
movls r0, #0
andhi r0, r3, #1
-L4
- cmp r11, r1
- bcc L26
- bls L7
- rsb r3, r1, r11
- cmp ip, r3
- bcc L26
- cmp ip, #0
- beq L7
- add r10, r11, ip
- add lr, ip, r1
- b L16
-L29
- sub ip, ip, #8
- cmp ip, #7
- ldrd r2, [lr, #-8]!
+ b memcopy_default
+
+memcopy_check_optim_overlap
+ // r10 = dest_end, r14 = source_end
+ add r10, r11, r12
+ add r14, r12, r1
+
+ // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
+ cmp r2, #31
movls r0, #0
- cmp ip, #0
- strd r2, [r10, #-8]!
- beq L7
-L16
- cmp r0, #0
- bne L29
- sub r3, lr, #1
- sub ip, ip, #1
+ movhi r0, #1
+ tst r10, #0xF
+ movne r0, #0
+ tst r14, #0xF
+ movne r0, #0
+ b memcopy_overlapped
+
+memcopy_overlapped_non_optim
+ // We read 1 byte from the end of the source buffer
+ sub r3, r14, #1
+ sub r12, r12, #1
ldrb r3, [r3, #0]
sub r2, r10, #1
- cmp ip, #0
+ cmp r12, #0
+ // We write 1 byte at the end of the dest buffer
sub r10, r10, #1
- sub lr, lr, #1
+ sub r14, r14, #1
strb r3, [r2, #0]
- bne L16
- b L7
-L11
- ldrb r3, [lr], #1
- sub ip, ip, #1
+ bne memcopy_overlapped_non_optim
+ b memcopy_end
+
+// r10 = dest_end, r14 = source_end
+memcopy_overlapped
+ // Are we in the optimized case ?
+ cmp r0, #0
+ beq memcopy_overlapped_non_optim
+
+ // Optimized Overlapped - Read 32 bytes
+ sub r14, r14, #32
+ sub r12, r12, #32
+ cmp r12, #31
+ ldmia r14, {r2-r9}
+
+ // If length is less than 32 then disable optim
+ movls r0, #0
+
+ cmp r12, #0
+
+ // Optimized Overlapped - Write 32 bytes
+ sub r10, r10, #32
+ stmia r10, {r2-r9}
+
+ // while (length != 0)
+ bne memcopy_overlapped
+ b memcopy_end
+
+memcopy_default_non_optim
+ // Byte copy
+ ldrb r3, [r14], #1
+ sub r12, r12, #1
strb r3, [r10], #1
-L26
- cmp ip, #0
- beq L7
-L30
+
+memcopy_default
+ cmp r12, #0
+ beq memcopy_end
+
+// r10 = dest, r14 = source
+memcopy_default_loop
cmp r0, #0
- beq L11
- sub ip, ip, #32
- cmp ip, #31
- ldmia lr!, {r2-r9}
+ beq memcopy_default_non_optim
+
+ // Optimized memcopy - Read 32 Bytes
+ sub r12, r12, #32
+ cmp r12, #31
+ ldmia r14!, {r2-r9}
+
+ // If length is less than 32 then disable optim
movls r0, #0
- cmp ip, #0
+
+ cmp r12, #0
+
+ // Optimized memcopy - Write 32 Bytes
stmia r10!, {r2-r9}
- bne L30
-L7
+
+ // while (length != 0)
+ bne memcopy_default_loop
+
+memcopy_end
mov r0, r11
ldmfd sp!, {r4-r11, pc}