10 files changed, 1209 insertions, 14 deletions
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S
new file mode 100644
index 0000000000..951d15777a
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S
@@ -0,0 +1,138 @@
+//
+// Copyright (c) 2013 - 2016, Linaro Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of the Linaro nor the
+//       names of its contributors may be used to endorse or promote products
+//       derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+// Parameters and result.
+#define src1      r0
+#define src2      r1
+#define limit     r2
+#define result    r0
+
+// Internal variables.
+#define data1     r3
+#define data2     r4
+#define limit_wd  r5
+#define diff      r6
+#define tmp1      r7
+#define tmp2      r12
+#define pos       r8
+#define mask      r14
+
+    .text
+    .thumb
+    .syntax unified
+    .align  5
+ASM_GLOBAL ASM_PFX(InternalMemCompareMem)
+ASM_PFX(InternalMemCompareMem):
+    push    {r4-r8, lr}
+    eor     tmp1, src1, src2
+    tst     tmp1, #3
+    bne     .Lmisaligned4
+    ands    tmp1, src1, #3
+    bne     .Lmutual_align
+    add     limit_wd, limit, #3
+    nop.w
+    lsr     limit_wd, limit_wd, #2
+
+    // Start of performance-critical section  -- one 32B cache line.
+.Lloop_aligned:
+    ldr     data1, [src1], #4
+    ldr     data2, [src2], #4
+.Lstart_realigned:
+    subs    limit_wd, limit_wd, #1
+    eor     diff, data1, data2        // Non-zero if differences found.
+    cbnz    diff, 0f
+    bne     .Lloop_aligned
+    // End of performance-critical section  -- one 32B cache line.
+
+    // Not reached the limit, must have found a diff.
+0:  cbnz    limit_wd, .Lnot_limit
+
+    // Limit % 4 == 0 => all bytes significant.
+    ands    limit, limit, #3
+    beq     .Lnot_limit
+
+    lsl     limit, limit, #3              // Bits -> bytes.
+    mov     mask, #~0
+    lsl     mask, mask, limit
+    bic     data1, data1, mask
+    bic     data2, data2, mask
+
+    orr     diff, diff, mask
+
+.Lnot_limit:
+    rev     diff, diff
+    rev     data1, data1
+    rev     data2, data2
+
+    // The MS-non-zero bit of DIFF marks either the first bit
+    // that is different, or the end of the significant data.
+    // Shifting left now will bring the critical information into the
+    // top bits.
+    clz     pos, diff
+    lsl     data1, data1, pos
+    lsl     data2, data2, pos
+
+    // But we need to zero-extend (char is unsigned) the value and then
+    // perform a signed 32-bit subtraction.
+    lsr     data1, data1, #28
+    sub     result, data1, data2, lsr #28
+    pop     {r4-r8, pc}
+
+.Lmutual_align:
+    // Sources are mutually aligned, but are not currently at an
+    // alignment boundary.  Round down the addresses and then mask off
+    // the bytes that precede the start point.
+    bic     src1, src1, #3
+    bic     src2, src2, #3
+    add     limit, limit, tmp1          // Adjust the limit for the extra.
+    lsl     tmp1, tmp1, #2              // Bytes beyond alignment -> bits.
+    ldr     data1, [src1], #4
+    neg     tmp1, tmp1                  // Bits to alignment -32.
+    ldr     data2, [src2], #4
+    mov     tmp2, #~0
+
+    // Little-endian.  Early bytes are at LSB.
+    lsr     tmp2, tmp2, tmp1            // Shift (tmp1 & 31).
+    add     limit_wd, limit, #3
+    orr     data1, data1, tmp2
+    orr     data2, data2, tmp2
+    lsr     limit_wd, limit_wd, #2
+    b       .Lstart_realigned
+
+.Lmisaligned4:
+    sub     limit, limit, #1
+1:
+    // Perhaps we can do better than this.
+    ldrb    data1, [src1], #1
+    ldrb    data2, [src2], #1
+    subs    limit, limit, #1
+    it      cs
+    cmpcs   data1, data2
+    beq     1b
+    sub     result, data1, data2
+    pop     {r4-r8, pc}
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm
new file mode 100644
index 0000000000..47b49ee164
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm
@@ -0,0 +1,140 @@
+;
+; Copyright (c) 2013 - 2016, Linaro Limited
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are met:
+;     * Redistributions of source code must retain the above copyright
+;       notice, this list of conditions and the following disclaimer.
+;     * Redistributions in binary form must reproduce the above copyright
+;       notice, this list of conditions and the following disclaimer in the
+;       documentation and/or other materials provided with the distribution.
+;     * Neither the name of the Linaro nor the
+;       names of its contributors may be used to endorse or promote products
+;       derived from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;
+
+; Parameters and result.
+#define src1      r0
+#define src2      r1
+#define limit     r2
+#define result    r0
+
+; Internal variables.
+#define data1     r3
+#define data2     r4
+#define limit_wd  r5
+#define diff      r6
+#define tmp1      r7
+#define tmp2      r12
+#define pos       r8
+#define mask      r14
+
+    EXPORT  InternalMemCompareMem
+    THUMB
+    AREA    CompareMem, CODE, READONLY
+
+InternalMemCompareMem
+    push    {r4-r8, lr}
+    eor     tmp1, src1, src2
+    tst     tmp1, #3
+    bne     Lmisaligned4
+    ands    tmp1, src1, #3
+    bne     Lmutual_align
+    add     limit_wd, limit, #3
+    nop.w
+    lsr     limit_wd, limit_wd, #2
+
+    ; Start of performance-critical section  -- one 32B cache line.
+Lloop_aligned
+    ldr     data1, [src1], #4
+    ldr     data2, [src2], #4
+Lstart_realigned
+    subs    limit_wd, limit_wd, #1
+    eor     diff, data1, data2        ; Non-zero if differences found.
+    cbnz    diff, L0
+    bne     Lloop_aligned
+    ; End of performance-critical section  -- one 32B cache line.
+
+    ; Not reached the limit, must have found a diff.
+L0
+    cbnz    limit_wd, Lnot_limit
+
+    // Limit % 4 == 0 => all bytes significant.
+    ands    limit, limit, #3
+    beq     Lnot_limit
+
+    lsl     limit, limit, #3              // Bits -> bytes.
+    mov     mask, #~0
+    lsl     mask, mask, limit
+    bic     data1, data1, mask
+    bic     data2, data2, mask
+
+    orr     diff, diff, mask
+
+Lnot_limit
+    rev     diff, diff
+    rev     data1, data1
+    rev     data2, data2
+
+    ; The MS-non-zero bit of DIFF marks either the first bit
+    ; that is different, or the end of the significant data.
+    ; Shifting left now will bring the critical information into the
+    ; top bits.
+    clz     pos, diff
+    lsl     data1, data1, pos
+    lsl     data2, data2, pos
+
+    ; But we need to zero-extend (char is unsigned) the value and then
+    ; perform a signed 32-bit subtraction.
+    lsr     data1, data1, #28
+    sub     result, data1, data2, lsr #28
+    pop     {r4-r8, pc}
+
+Lmutual_align
+    ; Sources are mutually aligned, but are not currently at an
+    ; alignment boundary.  Round down the addresses and then mask off
+    ; the bytes that precede the start point.
+    bic     src1, src1, #3
+    bic     src2, src2, #3
+    add     limit, limit, tmp1          ; Adjust the limit for the extra.
+    lsl     tmp1, tmp1, #2              ; Bytes beyond alignment -> bits.
+    ldr     data1, [src1], #4
+    neg     tmp1, tmp1                  ; Bits to alignment -32.
+    ldr     data2, [src2], #4
+    mov     tmp2, #~0
+
+    ; Little-endian.  Early bytes are at LSB.
+    lsr     tmp2, tmp2, tmp1            ; Shift (tmp1 & 31).
+    add     limit_wd, limit, #3
+    orr     data1, data1, tmp2
+    orr     data2, data2, tmp2
+    lsr     limit_wd, limit_wd, #2
+    b       Lstart_realigned
+
+Lmisaligned4
+    sub     limit, limit, #1
+L1
+    // Perhaps we can do better than this.
+    ldrb    data1, [src1], #1
+    ldrb    data2, [src2], #1
+    subs    limit, limit, #1
+    it      cs
+    cmpcs   data1, data2
+    beq     L1
+    sub     result, data1, data2
+    pop     {r4-r8, pc}
+
+    END
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S
new file mode 100644
index 0000000000..fb5293befc
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S
@@ -0,0 +1,172 @@
+#------------------------------------------------------------------------------
+#
+# CopyMem() worker for ARM
+#
+# This file started out as C code that did 64 bit moves if the buffer was
+# 32-bit aligned, else it does a byte copy. It also does a byte copy for
+# any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
+#
+# Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>
+# Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
+# This program and the accompanying materials
+# are licensed and made available under the terms and conditions of the BSD License
+# which accompanies this distribution.  The full text of the license may be found at
+# http://opensource.org/licenses/bsd-license.php
+#
+# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+#
+#------------------------------------------------------------------------------
+
+    .text
+    .thumb
+    .syntax unified
+
+/**
+  Copy Length bytes from Source to Destination. Overlap is OK.
+
+  This implementation
+
+  @param  Destination Target of copy
+  @param  Source      Place to copy from
+  @param  Length      Number of bytes to copy
+
+  @return Destination
+
+
+VOID *
+EFIAPI
+InternalMemCopyMem (
+  OUT     VOID                      *DestinationBuffer,
+  IN      CONST VOID                *SourceBuffer,
+  IN      UINTN                     Length
+  )
+**/
+ASM_GLOBAL ASM_PFX(InternalMemCopyMem)
+ASM_PFX(InternalMemCopyMem):
+    push    {r4-r11, lr}
+    // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
+    mov     r11, r0
+    mov     r10, r0
+    mov     r12, r2
+    mov     r14, r1
+
+    cmp     r11, r1
+    // If (dest < source)
+    bcc     memcopy_check_optim_default
+
+    // If (source + length < dest)
+    rsb     r3, r1, r11
+    cmp     r12, r3
+    bcc     memcopy_check_optim_default
+    b       memcopy_check_optim_overlap
+
+memcopy_check_optim_default:
+    // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
+    tst     r0, #0xF
+    it      ne
+    movne   r0, #0
+    bne     memcopy_default
+    tst     r1, #0xF
+    ite     ne
+    movne   r3, #0
+    moveq   r3, #1
+    cmp     r2, #31
+    ite     ls
+    movls   r0, #0
+    andhi   r0, r3, #1
+    b       memcopy_default
+
+memcopy_check_optim_overlap:
+    // r10 = dest_end, r14 = source_end
+    add     r10, r11, r12
+    add     r14, r12, r1
+
+    // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
+    cmp     r2, #31
+    ite     ls
+    movls   r0, #0
+    movhi   r0, #1
+    tst     r10, #0xF
+    it      ne
+    movne   r0, #0
+    tst     r14, #0xF
+    it      ne
+    movne   r0, #0
+    b       memcopy_overlapped
+
+memcopy_overlapped_non_optim:
+    // We read 1 byte from the end of the source buffer
+    sub     r3, r14, #1
+    sub     r12, r12, #1
+    ldrb    r3, [r3, #0]
+    sub     r2, r10, #1
+    cmp     r12, #0
+    // We write 1 byte at the end of the dest buffer
+    sub     r10, r10, #1
+    sub     r14, r14, #1
+    strb    r3, [r2, #0]
+    bne     memcopy_overlapped_non_optim
+    b       memcopy_end
+
+// r10 = dest_end, r14 = source_end
+memcopy_overlapped:
+    // Are we in the optimized case ?
+    cmp     r0, #0
+    beq     memcopy_overlapped_non_optim
+
+    // Optimized Overlapped - Read 32 bytes
+    sub     r14, r14, #32
+    sub     r12, r12, #32
+    cmp     r12, #31
+    ldmia   r14, {r2-r9}
+
+    // If length is less than 32 then disable optim
+    it      ls
+    movls   r0, #0
+
+    cmp     r12, #0
+
+    // Optimized Overlapped - Write 32 bytes
+    sub     r10, r10, #32
+    stmia   r10, {r2-r9}
+
+    // while (length != 0)
+    bne     memcopy_overlapped
+    b       memcopy_end
+
+memcopy_default_non_optim:
+    // Byte copy
+    ldrb    r3, [r14], #1
+    sub     r12, r12, #1
+    strb    r3, [r10], #1
+
+memcopy_default:
+    cmp     r12, #0
+    beq     memcopy_end
+
+// r10 = dest, r14 = source
+memcopy_default_loop:
+    cmp     r0, #0
+    beq     memcopy_default_non_optim
+
+    // Optimized memcopy - Read 32 Bytes
+    sub     r12, r12, #32
+    cmp     r12, #31
+    ldmia   r14!, {r2-r9}
+
+    // If length is less than 32 then disable optim
+    it      ls
+    movls   r0, #0
+
+    cmp     r12, #0
+
+    // Optimized memcopy - Write 32 Bytes
+    stmia   r10!, {r2-r9}
+
+    // while (length != 0)
+    bne     memcopy_default_loop
+
+memcopy_end:
+    mov     r0, r11
+    pop     {r4-r11, pc}
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm
new file mode 100644
index 0000000000..2034807954
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm
@@ -0,0 +1,147 @@
+;------------------------------------------------------------------------------
+;
+; CopyMem() worker for ARM
+;
+; This file started out as C code that did 64 bit moves if the buffer was
+; 32-bit aligned, else it does a byte copy. It also does a byte copy for
+; any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
+;
+; Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>
+; Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
+; This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution.  The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+;------------------------------------------------------------------------------
+
+    EXPORT  InternalMemCopyMem
+    AREA    SetMem, CODE, READONLY
+    THUMB
+
+InternalMemCopyMem
+  stmfd  sp!, {r4-r11, lr}
+  // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
+  mov  r11, r0
+  mov  r10, r0
+  mov  r12, r2
+  mov  r14, r1
+
+memcopy_check_overlapped
+  cmp  r11, r1
+  // If (dest < source)
+  bcc  memcopy_check_optim_default
+
+  // If (source + length < dest)
+  rsb  r3, r1, r11
+  cmp  r12, r3
+  bcc  memcopy_check_optim_default
+  b     memcopy_check_optim_overlap
+
+memcopy_check_optim_default
+  // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
+  tst  r0, #0xF
+  movne  r0, #0
+  bne   memcopy_default
+  tst  r1, #0xF
+  movne  r3, #0
+  moveq  r3, #1
+  cmp  r2, #31
+  movls  r0, #0
+  andhi  r0, r3, #1
+  b     memcopy_default
+
+memcopy_check_optim_overlap
+  // r10 = dest_end, r14 = source_end
+  add  r10, r11, r12
+  add  r14, r12, r1
+
+  // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
+  cmp  r2, #31
+  movls  r0, #0
+  movhi  r0, #1
+  tst  r10, #0xF
+  movne  r0, #0
+  tst  r14, #0xF
+  movne  r0, #0
+  b  memcopy_overlapped
+
+memcopy_overlapped_non_optim
+  // We read 1 byte from the end of the source buffer
+  sub  r3, r14, #1
+  sub  r12, r12, #1
+  ldrb  r3, [r3, #0]
+  sub  r2, r10, #1
+  cmp  r12, #0
+  // We write 1 byte at the end of the dest buffer
+  sub  r10, r10, #1
+  sub  r14, r14, #1
+  strb  r3, [r2, #0]
+  bne  memcopy_overlapped_non_optim
+  b   memcopy_end
+
+// r10 = dest_end, r14 = source_end
+memcopy_overlapped
+  // Are we in the optimized case ?
+  cmp  r0, #0
+  beq  memcopy_overlapped_non_optim
+
+  // Optimized Overlapped - Read 32 bytes
+  sub  r14, r14, #32
+  sub  r12, r12, #32
+  cmp  r12, #31
+  ldmia  r14, {r2-r9}
+
+  // If length is less than 32 then disable optim
+  movls  r0, #0
+
+  cmp  r12, #0
+
+  // Optimized Overlapped - Write 32 bytes
+  sub  r10, r10, #32
+  stmia  r10, {r2-r9}
+
+  // while (length != 0)
+  bne  memcopy_overlapped
+  b   memcopy_end
+
+memcopy_default_non_optim
+  // Byte copy
+  ldrb  r3, [r14], #1
+  sub  r12, r12, #1
+  strb  r3, [r10], #1
+
+memcopy_default
+  cmp  r12, #0
+  beq  memcopy_end
+
+// r10 = dest, r14 = source
+memcopy_default_loop
+  cmp  r0, #0
+  beq  memcopy_default_non_optim
+
+  // Optimized memcopy - Read 32 Bytes
+  sub  r12, r12, #32
+  cmp  r12, #31
+  ldmia  r14!, {r2-r9}
+
+  // If length is less than 32 then disable optim
+  movls  r0, #0
+
+  cmp  r12, #0
+
+  // Optimized memcopy - Write 32 Bytes
+  stmia  r10!, {r2-r9}
+
+  // while (length != 0)
+  bne  memcopy_default_loop
+
+memcopy_end
+  mov  r0, r11
+  ldmfd  sp!, {r4-r11, pc}
+
+  END
+
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.S b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.S
new file mode 100644
index 0000000000..dc0e74e865
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.S
@@ -0,0 +1,146 @@
+// Copyright (c) 2010-2011, Linaro Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//    * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+//    * Neither the name of Linaro Limited nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+//
+// Written by Dave Gilbert <david.gilbert@linaro.org>
+//
+// This memchr routine is optimised on a Cortex-A9 and should work on
+// all ARMv7 processors.   It has a fast past for short sizes, and has
+// an optimised path for large data sets; the worst case is finding the
+// match early in a large data set.
+//
+
+
+// 2011-02-07 david.gilbert@linaro.org
+//    Extracted from local git a5b438d861
+// 2011-07-14 david.gilbert@linaro.org
+//    Import endianness fix from local git ea786f1b
+// 2011-12-07 david.gilbert@linaro.org
+//    Removed unneeded cbz from align loop
+
+// this lets us check a flag in a 00/ff byte easily in either endianness
+#define CHARTSTMASK(c) 1<<(c*8)
+
+    .text
+    .thumb
+    .syntax unified
+
+    .type ASM_PFX(InternalMemScanMem8), %function
+ASM_GLOBAL ASM_PFX(InternalMemScanMem8)
+ASM_PFX(InternalMemScanMem8):
+    // r0 = start of memory to scan
+    // r1 = length
+    // r2 = character to look for
+    // returns r0 = pointer to character or NULL if not found
+    uxtb    r2, r2        // Don't think we can trust the caller to actually pass a char
+
+    cmp     r1, #16       // If it's short don't bother with anything clever
+    blt     20f
+
+    tst     r0, #7        // If it's already aligned skip the next bit
+    beq     10f
+
+    // Work up to an aligned point
+5:
+    ldrb    r3, [r0],#1
+    subs    r1, r1, #1
+    cmp     r3, r2
+    beq     50f           // If it matches exit found
+    tst     r0, #7
+    bne     5b            // If not aligned yet then do next byte
+
+10:
+    // At this point, we are aligned, we know we have at least 8 bytes to work with
+    push    {r4-r7}
+    orr     r2, r2, r2, lsl #8  // expand the match word across to all bytes
+    orr     r2, r2, r2, lsl #16
+    bic     r4, r1, #7    // Number of double words to work with
+    mvns    r7, #0        // all F's
+    movs    r3, #0
+
+15:
+    ldmia   r0!, {r5,r6}
+    subs    r4, r4, #8
+    eor     r5, r5, r2    // Get it so that r5,r6 have 00's where the bytes match the target
+    eor     r6, r6, r2
+    uadd8   r5, r5, r7    // Parallel add 0xff - sets the GE bits for anything that wasn't 0
+    sel     r5, r3, r7    // bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+    uadd8   r6, r6, r7    // Parallel add 0xff - sets the GE bits for anything that wasn't 0
+    sel     r6, r5, r7    // chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+    cbnz    r6, 60f
+    bne     15b           // (Flags from the subs above) If not run out of bytes then go around again
+
+    pop     {r4-r7}
+    and     r2, r2, #0xff // Get r2 back to a single character from the expansion above
+    and     r1, r1, #7    // Leave the count remaining as the number after the double words have been done
+
+20:
+    cbz     r1, 40f       // 0 length or hit the end already then not found
+
+21: // Post aligned section, or just a short call
+    ldrb    r3, [r0], #1
+    subs    r1, r1, #1
+    eor     r3, r3, r2    // r3 = 0 if match - doesn't break flags from sub
+    cbz     r3, 50f
+    bne     21b           // on r1 flags
+
+40:
+    movs    r0, #0        // not found
+    bx      lr
+
+50:
+    subs    r0, r0, #1    // found
+    bx      lr
+
+60: // We're here because the fast path found a hit - now we have to track down exactly which word it was
+    // r0 points to the start of the double word after the one that was tested
+    // r5 has the 00/ff pattern for the first word, r6 has the chained value
+    cmp     r5, #0
+    itte    eq
+    moveq   r5, r6        // the end is in the 2nd word
+    subeq   r0, r0, #3    // Points to 2nd byte of 2nd word
+    subne   r0, r0, #7    // or 2nd byte of 1st word
+
+    // r0 currently points to the 3rd byte of the word containing the hit
+    tst     r5, #CHARTSTMASK(0)     // 1st character
+    bne     61f
+    adds    r0, r0, #1
+    tst     r5, #CHARTSTMASK(1)     // 2nd character
+    ittt    eq
+    addeq   r0, r0 ,#1
+    tsteq   r5, #(3 << 15)          // 2nd & 3rd character
+    // If not the 3rd must be the last one
+    addeq   r0, r0, #1
+
+61:
+    pop     {r4-r7}
+    subs    r0, r0, #1
+    bx      lr
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.asm b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.asm
new file mode 100644
index 0000000000..abda87320e
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.asm
@@ -0,0 +1,147 @@
+; Copyright (c) 2010-2011, Linaro Limited
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+;    * Redistributions of source code must retain the above copyright
+;    notice, this list of conditions and the following disclaimer.
+;
+;    * Redistributions in binary form must reproduce the above copyright
+;    notice, this list of conditions and the following disclaimer in the
+;    documentation and/or other materials provided with the distribution.
+;
+;    * Neither the name of Linaro Limited nor the names of its
+;    contributors may be used to endorse or promote products derived
+;    from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;
+
+;
+; Written by Dave Gilbert <david.gilbert@linaro.org>
+;
+; This memchr routine is optimised on a Cortex-A9 and should work on
+; all ARMv7 processors.   It has a fast past for short sizes, and has
+; an optimised path for large data sets; the worst case is finding the
+; match early in a large data set.
+;
+
+
+; 2011-02-07 david.gilbert@linaro.org
+;    Extracted from local git a5b438d861
+; 2011-07-14 david.gilbert@linaro.org
+;    Import endianness fix from local git ea786f1b
+; 2011-12-07 david.gilbert@linaro.org
+;    Removed unneeded cbz from align loop
+
+; this lets us check a flag in a 00/ff byte easily in either endianness
+#define CHARTSTMASK(c) 1<<(c*8)
+
+    EXPORT  InternalMemScanMem8
+    AREA    ScanMem, CODE, READONLY
+    THUMB
+
+InternalMemScanMem8
+    ; r0 = start of memory to scan
+    ; r1 = length
+    ; r2 = character to look for
+    ; returns r0 = pointer to character or NULL if not found
+    uxtb    r2, r2        ; Don't think we can trust the caller to actually pass a char
+
+    cmp     r1, #16       ; If it's short don't bother with anything clever
+    blt     L20
+
+    tst     r0, #7        ; If it's already aligned skip the next bit
+    beq     L10
+
+    ; Work up to an aligned point
+L5
+    ldrb    r3, [r0],#1
+    subs    r1, r1, #1
+    cmp     r3, r2
+    beq     L50           ; If it matches exit found
+    tst     r0, #7
+    bne     L5            ; If not aligned yet then do next byte
+
+L10
+    ; At this point, we are aligned, we know we have at least 8 bytes to work with
+    push    {r4-r7}
+    orr     r2, r2, r2, lsl #8  ; expand the match word across to all bytes
+    orr     r2, r2, r2, lsl #16
+    bic     r4, r1, #7    ; Number of double words to work with
+    mvns    r7, #0        ; all F's
+    movs    r3, #0
+
+L15
+    ldmia   r0!, {r5,r6}
+    subs    r4, r4, #8
+    eor     r5, r5, r2    ; Get it so that r5,r6 have 00's where the bytes match the target
+    eor     r6, r6, r2
+    uadd8   r5, r5, r7    ; Parallel add 0xff - sets the GE bits for anything that wasn't 0
+    sel     r5, r3, r7    ; bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+    uadd8   r6, r6, r7    ; Parallel add 0xff - sets the GE bits for anything that wasn't 0
+    sel     r6, r5, r7    ; chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+    cbnz    r6, L60
+    bne     L15           ; (Flags from the subs above) If not run out of bytes then go around again
+
+    pop     {r4-r7}
+    and     r2, r2, #0xff ; Get r2 back to a single character from the expansion above
+    and     r1, r1, #7    ; Leave the count remaining as the number after the double words have been done
+
+L20
+    cbz     r1, L40       ; 0 length or hit the end already then not found
+
+L21 ; Post aligned section, or just a short call
+    ldrb    r3, [r0], #1
+    subs    r1, r1, #1
+    eor     r3, r3, r2    ; r3 = 0 if match - doesn't break flags from sub
+    cbz     r3, L50
+    bne     L21           ; on r1 flags
+
+L40
+    movs    r0, #0        ; not found
+    bx      lr
+
+L50
+    subs    r0, r0, #1    ; found
+    bx      lr
+
+L60 ; We're here because the fast path found a hit - now we have to track down exactly which word it was
+    ; r0 points to the start of the double word after the one that was tested
+    ; r5 has the 00/ff pattern for the first word, r6 has the chained value
+    cmp     r5, #0
+    itte    eq
+    moveq   r5, r6        ; the end is in the 2nd word
+    subeq   r0, r0, #3    ; Points to 2nd byte of 2nd word
+    subne   r0, r0, #7    ; or 2nd byte of 1st word
+
+    ; r0 currently points to the 3rd byte of the word containing the hit
+    tst     r5, #CHARTSTMASK(0)     ; 1st character
+    bne     L61
+    adds    r0, r0, #1
+    tst     r5, #CHARTSTMASK(1)     ; 2nd character
+    ittt    eq
+    addeq   r0, r0 ,#1
+    tsteq   r5, #(3 << 15)          ; 2nd & 3rd character
+    ; If not the 3rd must be the last one
+    addeq   r0, r0, #1
+
+L61
+    pop     {r4-r7}
+    subs    r0, r0, #1
+    bx      lr
+
+    END
+
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMemGeneric.c b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMemGeneric.c
new file mode 100644
index 0000000000..20fa7e9be6
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMemGeneric.c
@@ -0,0 +1,142 @@
+/** @file
+  Architecture Independent Base Memory Library Implementation.
+
+  The following BaseMemoryLib instances contain the same copy of this file:
+    BaseMemoryLib
+    PeiMemoryLib
+    UefiMemoryLib
+
+  Copyright (c) 2006 - 2016, Intel Corporation. All rights reserved.<BR>
+  This program and the accompanying materials
+  are licensed and made available under the terms and conditions of the BSD License
+  which accompanies this distribution.  The full text of the license may be found at
+  http://opensource.org/licenses/bsd-license.php.
+
+  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+  WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+
+**/
+
+#include "../MemLibInternals.h"
+
+/**
+  Scans a target buffer for a 16-bit value, and returns a pointer to the
+  matching 16-bit value in the target buffer.
+
+  @param  Buffer  The pointer to the target buffer to scan.
+  @param  Length  The count of 16-bit value to scan. Must be non-zero.
+  @param  Value   The value to search for in the target buffer.
+
+  @return The pointer to the first occurrence, or NULL if not found.
+
+**/
+CONST VOID *
+EFIAPI
+InternalMemScanMem16 (
+  IN      CONST VOID                *Buffer,
+  IN      UINTN                     Length,
+  IN      UINT16                    Value
+  )
+{
+  CONST UINT16                      *Pointer;
+
+  Pointer = (CONST UINT16*)Buffer;
+  do {
+    if (*Pointer == Value) {
+      return Pointer;
+    }
+    ++Pointer;
+  } while (--Length != 0);
+  return NULL;
+}
+
+/**
+  Scans a target buffer for a 32-bit value, and returns a pointer to the
+  matching 32-bit value in the target buffer.
+
+  @param  Buffer  The pointer to the target buffer to scan.
+  @param  Length  The count of 32-bit value to scan. Must be non-zero.
+  @param  Value   The value to search for in the target buffer.
+
+  @return The pointer to the first occurrence, or NULL if not found.
+
+**/
+CONST VOID *
+EFIAPI
+InternalMemScanMem32 (
+  IN      CONST VOID                *Buffer,
+  IN      UINTN                     Length,
+  IN      UINT32                    Value
+  )
+{
+  CONST UINT32                      *Pointer;
+
+  Pointer = (CONST UINT32*)Buffer;
+  do {
+    if (*Pointer == Value) {
+      return Pointer;
+    }
+    ++Pointer;
+  } while (--Length != 0);
+  return NULL;
+}
+
+/**
+  Scans a target buffer for a 64-bit value, and returns a pointer to the
+  matching 64-bit value in the target buffer.
+
+  @param  Buffer  The pointer to the target buffer to scan.
+  @param  Length  The count of 64-bit value to scan. Must be non-zero.
+  @param  Value   The value to search for in the target buffer.
+
+  @return The pointer to the first occurrence, or NULL if not found.
+
+**/
+CONST VOID *
+EFIAPI
+InternalMemScanMem64 (
+  IN      CONST VOID                *Buffer,
+  IN      UINTN                     Length,
+  IN      UINT64                    Value
+  )
+{
+  CONST UINT64                      *Pointer;
+
+  Pointer = (CONST UINT64*)Buffer;
+  do {
+    if (*Pointer == Value) {
+      return Pointer;
+    }
+    ++Pointer;
+  } while (--Length != 0);
+  return NULL;
+}
+
+/**
+  Checks whether the contents of a buffer are all zeros.
+
+  @param  Buffer  The pointer to the buffer to be checked.
+  @param  Length  The size of the buffer (in bytes) to be checked.
+
+  @retval TRUE    Contents of the buffer are all zeros.
+  @retval FALSE   Contents of the buffer are not all zeros.
+
+**/
+BOOLEAN
+EFIAPI
+InternalMemIsZeroBuffer (
+  IN CONST VOID  *Buffer,
+  IN UINTN       Length
+  )
+{
+  CONST UINT8 *BufferData;
+  UINTN       Index;
+
+  BufferData = Buffer;
+  for (Index = 0; Index < Length; Index++) {
+    if (BufferData[Index] != 0) {
+      return FALSE;
+    }
+  }
+  return TRUE;
+}
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S
new file mode 100644
index 0000000000..c1755539d3
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S
@@ -0,0 +1,77 @@
+#------------------------------------------------------------------------------
+#
+# Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
+#
+# This program and the accompanying materials are licensed and made available
+# under the terms and conditions of the BSD License which accompanies this
+# distribution.  The full text of the license may be found at
+# http://opensource.org/licenses/bsd-license.php
+#
+# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+#
+#------------------------------------------------------------------------------
+
+    .text
+    .thumb
+    .syntax unified
+    .align  5
+ASM_GLOBAL ASM_PFX(InternalMemZeroMem)
+ASM_PFX(InternalMemZeroMem):
+    movs    r2, #0
+
+ASM_GLOBAL ASM_PFX(InternalMemSetMem)
+ASM_PFX(InternalMemSetMem):
+    uxtb    r2, r2
+    orr     r2, r2, r2, lsl #8
+
+ASM_GLOBAL ASM_PFX(InternalMemSetMem16)
+ASM_PFX(InternalMemSetMem16):
+    uxth    r2, r2
+    orr     r2, r2, r2, lsl #16
+
+ASM_GLOBAL ASM_PFX(InternalMemSetMem32)
+ASM_PFX(InternalMemSetMem32):
+    mov     r3, r2
+
+ASM_GLOBAL ASM_PFX(InternalMemSetMem64)
+ASM_PFX(InternalMemSetMem64):
+    push    {r4, lr}
+    cmp     r1, #16                 // fewer than 16 bytes of input?
+    add     r1, r1, r0              // r1 := dst + length
+    add     lr, r0, #16
+    blt     2f
+    bic     lr, lr, #15             // align output pointer
+
+    str     r2, [r0]                // potentially unaligned store of 4 bytes
+    str     r3, [r0, #4]            // potentially unaligned store of 4 bytes
+    str     r2, [r0, #8]            // potentially unaligned store of 4 bytes
+    str     r3, [r0, #12]           // potentially unaligned store of 4 bytes
+    beq     1f
+
+0:  add     lr, lr, #16             // advance the output pointer by 16 bytes
+    subs    r4, r1, lr              // past the output?
+    blt     3f                      // break out of the loop
+    strd    r2, r3, [lr, #-16]      // aligned store of 16 bytes
+    strd    r2, r3, [lr, #-8]
+    bne     0b                      // goto beginning of loop
+1:  pop     {r4, pc}
+
+2:  subs    r4, r1, lr
+3:  adds    r4, r4, #16
+    subs    r1, r1, #8
+    cmp     r4, #4                  // between 4 and 15 bytes?
+    blt     4f
+    cmp     r4, #8                  // between 8 and 15 bytes?
+    str     r2, [lr, #-16]          // overlapping store of 4 + (4 + 4) + 4 bytes
+    itt     gt
+    strgt   r3, [lr, #-12]
+    strgt   r2, [r1]
+    str     r3, [r1, #4]
+    pop     {r4, pc}
+
+4:  cmp     r4, #2                  // 2 or 3 bytes?
+    strb    r2, [lr, #-16]          // store 1 byte
+    it      ge
+    strhge  r2, [r1, #6]            // store 2 bytes
+    pop     {r4, pc}
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm
new file mode 100644
index 0000000000..2a8dc7d019
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm
@@ -0,0 +1,84 @@
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
+;
+; This program and the accompanying materials are licensed and made available
+; under the terms and conditions of the BSD License which accompanies this
+; distribution.  The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+;------------------------------------------------------------------------------
+
+    EXPORT  InternalMemZeroMem
+    EXPORT  InternalMemSetMem
+    EXPORT  InternalMemSetMem16
+    EXPORT  InternalMemSetMem32
+    EXPORT  InternalMemSetMem64
+
+    AREA    SetMem, CODE, READONLY, CODEALIGN, ALIGN=5
+    THUMB
+
+InternalMemZeroMem
+    movs    r2, #0
+
+InternalMemSetMem
+    uxtb    r2, r2
+    orr     r2, r2, r2, lsl #8
+
+InternalMemSetMem16
+    uxth    r2, r2
+    orr     r2, r2, r2, lsr #16
+
+InternalMemSetMem32
+    mov     r3, r2
+
+InternalMemSetMem64
+    push    {r4, lr}
+    cmp     r1, #16                 ; fewer than 16 bytes of input?
+    add     r1, r1, r0              ; r1 := dst + length
+    add     lr, r0, #16
+    blt     L2
+    bic     lr, lr, #15             ; align output pointer
+
+    str     r2, [r0]                ; potentially unaligned store of 4 bytes
+    str     r3, [r0, #4]            ; potentially unaligned store of 4 bytes
+    str     r2, [r0, #8]            ; potentially unaligned store of 4 bytes
+    str     r3, [r0, #12]           ; potentially unaligned store of 4 bytes
+    beq     L1
+
+L0
+    add     lr, lr, #16             ; advance the output pointer by 16 bytes
+    subs    r4, r1, lr              ; past the output?
+    blt     L3                      ; break out of the loop
+    strd    r2, r3, [lr, #-16]      ; aligned store of 16 bytes
+    strd    r2, r3, [lr, #-8]
+    bne     L0                      ; goto beginning of loop
+L1
+    pop     {r4, pc}
+
+L2
+    subs    r4, r1, lr
+L3
+    adds    r4, r4, #16
+    subs    r1, r1, #8
+    cmp     r4, #4                  ; between 4 and 15 bytes?
+    blt     L4
+    cmp     r4, #8                  ; between 8 and 15 bytes?
+    str     r2, [lr, #-16]          ; overlapping store of 4 + (4 + 4) + 4 bytes
+    itt     gt
+    strgt   r3, [lr, #-12]
+    strgt   r2, [r1]
+    str     r3, [r1, #4]
+    pop     {r4, pc}
+
+L4
+    cmp     r4, #2                  ; 2 or 3 bytes?
+    strb    r2, [lr, #-16]          ; store 1 byte
+    it      ge
+    strhge  r2, [r1, #6]            ; store 2 bytes
+    pop     {r4, pc}
+
+    END
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/BaseMemoryLibOptDxe.inf b/MdePkg/Library/BaseMemoryLibOptDxe/BaseMemoryLibOptDxe.inf
index 71691b9859..d95eb599ea 100644
--- a/MdePkg/Library/BaseMemoryLibOptDxe/BaseMemoryLibOptDxe.inf
+++ b/MdePkg/Library/BaseMemoryLibOptDxe/BaseMemoryLibOptDxe.inf
@@ -27,7 +27,7 @@
 
 
 #
-#  VALID_ARCHITECTURES           = IA32 X64
+#  VALID_ARCHITECTURES           = IA32 X64 ARM
 #
 
 [Sources]
@@ -79,19 +79,6 @@
   Ia32/CopyMem.nasm
   Ia32/CopyMem.asm
   Ia32/IsZeroBuffer.nasm
-  ScanMem64Wrapper.c
-  ScanMem32Wrapper.c
-  ScanMem16Wrapper.c
-  ScanMem8Wrapper.c
-  ZeroMemWrapper.c
-  CompareMemWrapper.c
-  SetMem64Wrapper.c
-  SetMem32Wrapper.c
-  SetMem16Wrapper.c
-  SetMemWrapper.c
-  CopyMemWrapper.c
-  IsZeroBufferWrapper.c
-  MemLibGuid.c
 
 [Sources.X64]
   X64/ScanMem64.nasm
@@ -128,6 +115,21 @@
   X64/CopyMem.asm
   X64/CopyMem.S
   X64/IsZeroBuffer.nasm
+
+[Sources.ARM]
+  Arm/ScanMem.S       |GCC
+  Arm/SetMem.S        |GCC
+  Arm/CopyMem.S       |GCC
+  Arm/CompareMem.S    |GCC
+
+  Arm/ScanMem.asm     |RVCT
+  Arm/SetMem.asm      |RVCT
+  Arm/CopyMem.asm     |RVCT
+  Arm/CompareMem.asm  |RVCT
+
+  Arm/ScanMemGeneric.c
+
+[Sources]
   ScanMem64Wrapper.c
   ScanMem32Wrapper.c
   ScanMem16Wrapper.c