summaryrefslogtreecommitdiff
path: root/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S
blob: 195a0b23f770a30a6dbb2fcdb83d1d5ef55c9d39 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#------------------------------------------------------------------------------
#
# CopyMem() worker for ARM
#
# This file started out as C code that did 64 bit moves if the buffer was
# 32-bit aligned, else it does a byte copy. It also does a byte copy for
# any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
#
# Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>
# Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
# This program and the accompanying materials
# are licensed and made available under the terms and conditions of the BSD License
# which accompanies this distribution.  The full text of the license may be found at
# http://opensource.org/licenses/bsd-license.php
#
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
#
#------------------------------------------------------------------------------

    .text
    .thumb
    .syntax unified

/**
  Copy Length bytes from Source to Destination. Overlap is OK.

  This implementation

  @param  Destination Target of copy
  @param  Source      Place to copy from
  @param  Length      Number of bytes to copy

  @return Destination


VOID *
EFIAPI
InternalMemCopyMem (
  OUT     VOID                      *DestinationBuffer,
  IN      CONST VOID                *SourceBuffer,
  IN      UINTN                     Length
  )
**/
ASM_GLOBAL ASM_PFX(InternalMemCopyMem)
ASM_PFX(InternalMemCopyMem):
    push    {r4-r11, lr}
    // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
    mov     r11, r0
    mov     r10, r0
    mov     r12, r2
    mov     r14, r1

    cmp     r11, r1
    // If (dest < source)
    bcc     memcopy_check_optim_default

    // If (source + length < dest)
    rsb     r3, r1, r11
    cmp     r12, r3
    bcc     memcopy_check_optim_default
    b       memcopy_check_optim_overlap

memcopy_check_optim_default:
    // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
    tst     r0, #0xF
    it      ne
    movne.n r0, #0
    bne     memcopy_default
    tst     r1, #0xF
    it      ne
    movne.n r3, #0
    it      eq
    moveq.n r3, #1
    cmp     r2, #31
    it      ls
    movls.n r0, #0
    bls     memcopy_default
    and     r0, r3, #1
    b       memcopy_default

memcopy_check_optim_overlap:
    // r10 = dest_end, r14 = source_end
    add     r10, r11, r12
    add     r14, r12, r1

    // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
    cmp     r2, #31
    it      ls
    movls.n r0, #0
    it      hi
    movhi.n r0, #1
    tst     r10, #0xF
    it      ne
    movne.n r0, #0
    tst     r14, #0xF
    it      ne
    movne.n r0, #0
    b       memcopy_overlapped

memcopy_overlapped_non_optim:
    // We read 1 byte from the end of the source buffer
    sub     r3, r14, #1
    sub     r12, r12, #1
    ldrb    r3, [r3, #0]
    sub     r2, r10, #1
    cmp     r12, #0
    // We write 1 byte at the end of the dest buffer
    sub     r10, r10, #1
    sub     r14, r14, #1
    strb    r3, [r2, #0]
    bne     memcopy_overlapped_non_optim
    b       memcopy_end

// r10 = dest_end, r14 = source_end
memcopy_overlapped:
    // Are we in the optimized case ?
    cmp     r0, #0
    beq     memcopy_overlapped_non_optim

    // Optimized Overlapped - Read 32 bytes
    sub     r14, r14, #32
    sub     r12, r12, #32
    cmp     r12, #31
    ldmia   r14, {r2-r9}

    // If length is less than 32 then disable optim
    it      ls
    movls.n r0, #0

    cmp     r12, #0

    // Optimized Overlapped - Write 32 bytes
    sub     r10, r10, #32
    stmia   r10, {r2-r9}

    // while (length != 0)
    bne     memcopy_overlapped
    b       memcopy_end

memcopy_default_non_optim:
    // Byte copy
    ldrb    r3, [r14], #1
    sub     r12, r12, #1
    strb    r3, [r10], #1

memcopy_default:
    cmp     r12, #0
    beq     memcopy_end

// r10 = dest, r14 = source
memcopy_default_loop:
    cmp     r0, #0
    beq     memcopy_default_non_optim

    // Optimized memcopy - Read 32 Bytes
    sub     r12, r12, #32
    cmp     r12, #31
    ldmia   r14!, {r2-r9}

    // If length is less than 32 then disable optim
    it      ls
    movls.n r0, #0

    cmp     r12, #0

    // Optimized memcopy - Write 32 Bytes
    stmia   r10!, {r2-r9}

    // while (length != 0)
    bne     memcopy_default_loop

memcopy_end:
    mov     r0, r11
    pop     {r4-r11, pc}