1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
/*++
Copyright (c) 2006, Intel Corporation
All rights reserved. This program and the accompanying materials
are licensed and made available under the terms and conditions of the BSD License
which accompanies this distribution. The full text of the license may be found at
http://opensource.org/licenses/bsd-license.php
THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
Module Name:
EfiZeroMemSSE2.c
Abstract:
This is the code that supports IA32-optimized ZeroMem service
--*/
#include "Tiano.h"
VOID
EfiCommonLibZeroMem (
IN VOID *Buffer,
IN UINTN Count
)
/*++
Input: VOID *Buffer - Pointer to buffer to clear
UINTN Count - Number of bytes to clear
Output: None.
Saves:
Modifies:
Description: This function is an optimized zero-memory function.
Notes: This function tries to zero memory 8 bytes at a time. As a result,
it first picks up any misaligned bytes, then words, before getting
in the main loop that does the 8-byte clears.
--*/
{
__asm {
mov ecx, Count
mov edi, Buffer
; Pick up misaligned start bytes (get pointer 4-byte aligned)
_StartByteZero:
mov eax, edi
and al, 3 ; check lower 2 bits of address
test al, al
je _ZeroBlocks ; already aligned?
cmp ecx, 0
je _ZeroMemDone
; Clear the byte memory location
mov BYTE PTR [edi], 0
inc edi
; Decrement our count
dec ecx
jmp _StartByteZero ; back to top of loop
_ZeroBlocks:
; Compute how many 64-byte blocks we can clear
mov edx, ecx
shr ecx, 6 ; convert to 64-byte count
shl ecx, 6 ; convert back to bytes
sub edx, ecx ; subtract from the original count
shr ecx, 6 ; and this is how many 64-byte blocks
; If no 64-byte blocks, then skip
cmp ecx, 0
je _ZeroRemaining
xorps xmm1, xmm1
_B:
movdqu OWORD PTR ds:[edi], xmm1
movdqu OWORD PTR ds:[edi+16], xmm1
movdqu OWORD PTR ds:[edi+32], xmm1
movdqu OWORD PTR ds:[edi+48], xmm1
add edi, 64
dec ecx
jnz _B
_ZeroRemaining:
; Zero out as many DWORDS as possible
mov ecx, edx
shr ecx, 2
xor eax, eax
rep stosd
; Zero out remaining as bytes
mov ecx, edx
and ecx, 03
rep stosb
_ZeroMemDone:
}
}
|