summaryrefslogtreecommitdiff
path: root/Core/MdePkg/Library/BaseMemoryLibOptDxe/AArch64/CompareMem.S
diff options
context:
space:
mode:
Diffstat (limited to 'Core/MdePkg/Library/BaseMemoryLibOptDxe/AArch64/CompareMem.S')
-rw-r--r--Core/MdePkg/Library/BaseMemoryLibOptDxe/AArch64/CompareMem.S142
1 files changed, 142 insertions, 0 deletions
diff --git a/Core/MdePkg/Library/BaseMemoryLibOptDxe/AArch64/CompareMem.S b/Core/MdePkg/Library/BaseMemoryLibOptDxe/AArch64/CompareMem.S
new file mode 100644
index 0000000000..a54de6948b
--- /dev/null
+++ b/Core/MdePkg/Library/BaseMemoryLibOptDxe/AArch64/CompareMem.S
@@ -0,0 +1,142 @@
+//
+// Copyright (c) 2013, Linaro Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// * Neither the name of the Linaro nor the
+// names of its contributors may be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+// Assumptions:
+//
+// ARMv8-a, AArch64
+//
+
+
+// Parameters and result.
+#define src1 x0
+#define src2 x1
+#define limit x2
+#define result x0
+
+// Internal variables.
+#define data1 x3
+#define data1w w3
+#define data2 x4
+#define data2w w4
+#define diff x6
+#define endloop x7
+#define tmp1 x8
+#define tmp2 x9
+#define pos x11
+#define limit_wd x12
+#define mask x13
+
+ .p2align 6
+ASM_GLOBAL ASM_PFX(InternalMemCompareMem)
+ASM_PFX(InternalMemCompareMem):
+ eor tmp1, src1, src2
+ tst tmp1, #7
+ b.ne .Lmisaligned8
+ ands tmp1, src1, #7
+ b.ne .Lmutual_align
+ add limit_wd, limit, #7
+ lsr limit_wd, limit_wd, #3
+
+ // Start of performance-critical section -- one 64B cache line.
+.Lloop_aligned:
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+.Lstart_realigned:
+ subs limit_wd, limit_wd, #1
+ eor diff, data1, data2 // Non-zero if differences found.
+ csinv endloop, diff, xzr, ne // Last Dword or differences.
+ cbz endloop, .Lloop_aligned
+ // End of performance-critical section -- one 64B cache line.
+
+ // Not reached the limit, must have found a diff.
+ cbnz limit_wd, .Lnot_limit
+
+ // Limit % 8 == 0 => all bytes significant.
+ ands limit, limit, #7
+ b.eq .Lnot_limit
+
+ lsl limit, limit, #3 // Bits -> bytes.
+ mov mask, #~0
+ lsl mask, mask, limit
+ bic data1, data1, mask
+ bic data2, data2, mask
+
+ orr diff, diff, mask
+
+.Lnot_limit:
+ rev diff, diff
+ rev data1, data1
+ rev data2, data2
+
+ // The MS-non-zero bit of DIFF marks either the first bit
+ // that is different, or the end of the significant data.
+ // Shifting left now will bring the critical information into the
+ // top bits.
+ clz pos, diff
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+
+ // But we need to zero-extend (char is unsigned) the value and then
+ // perform a signed 32-bit subtraction.
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ ret
+
+.Lmutual_align:
+ // Sources are mutually aligned, but are not currently at an
+ // alignment boundary. Round down the addresses and then mask off
+ // the bytes that precede the start point.
+ bic src1, src1, #7
+ bic src2, src2, #7
+ add limit, limit, tmp1 // Adjust the limit for the extra.
+ lsl tmp1, tmp1, #3 // Bytes beyond alignment -> bits.
+ ldr data1, [src1], #8
+ neg tmp1, tmp1 // Bits to alignment -64.
+ ldr data2, [src2], #8
+ mov tmp2, #~0
+
+ // Little-endian. Early bytes are at LSB.
+ lsr tmp2, tmp2, tmp1 // Shift (tmp1 & 63).
+ add limit_wd, limit, #7
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+ lsr limit_wd, limit_wd, #3
+ b .Lstart_realigned
+
+ .p2align 6
+.Lmisaligned8:
+ sub limit, limit, #1
+1:
+ // Perhaps we can do better than this.
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ subs limit, limit, #1
+ ccmp data1w, data2w, #0, cs // NZCV = 0b0000.
+ b.eq 1b
+ sub result, data1, data2
+ ret