summaryrefslogtreecommitdiff
path: root/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.S
blob: 118497a5201845258f85aa85a5c0bd985c89e6aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#
# ConvertAsm.py: Automatically generated from SetMem.asm
#
#------------------------------------------------------------------------------
#
# Copyright (c) 2006 - 2009, Intel Corporation. All rights reserved.<BR>
# This program and the accompanying materials
# are licensed and made available under the terms and conditions of the BSD License
# which accompanies this distribution.  The full text of the license may be found at
# http://opensource.org/licenses/bsd-license.php
#
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
#
# Module Name:
#
#   SetMem.S
#
# Abstract:
#
#   SetMem function
#
# Notes:
#
#------------------------------------------------------------------------------


#------------------------------------------------------------------------------
#  VOID *
#  EFIAPI
#  InternalMemSetMem (
#    IN VOID   *Buffer,
#    IN UINTN  Count,
#    IN UINT8  Value
#    )
#------------------------------------------------------------------------------
ASM_GLOBAL ASM_PFX(InternalMemSetMem)
ASM_PFX(InternalMemSetMem):
    pushq   %rdi
    movq    %rcx, %rdi                  # rdi <- Buffer
    movb    %r8b, %al                   # al <- Value
    movq    %rdi, %r9                   # r9 <- Buffer as return value
    xorq    %rcx, %rcx
    subq    %rdi, %rcx
    andq    $15, %rcx                   # rcx + rdi aligns on 16-byte boundary
    jz      L0
    cmpq    %rdx, %rcx
    cmova   %rdx, %rcx
    subq    %rcx, %rdx
    rep     stosb
L0:
    movq    %rdx, %rcx
    andq    $15, %rdx
    shrq    $4, %rcx
    jz      L_SetBytes
    movb    %al, %ah                    # ax <- Value repeats twice
    movdqa  %xmm0, 0x10(%rsp)           # save xmm0
    movd    %eax, %xmm0                 # xmm0[0..16] <- Value repeats twice
    pshuflw $0, %xmm0, %xmm0            # xmm0[0..63] <- Value repeats 8 times
    movlhps %xmm0, %xmm0                # xmm0 <- Value repeats 16 times
L1:
    movntdq %xmm0, (%rdi)               # rdi should be 16-byte aligned
    add     $16, %rdi
    loop    L1
    mfence
    movdqa  0x10(%rsp), %xmm0           # restore xmm0
L_SetBytes:
    movl    %edx, %ecx                  # high 32 bits of rcx are always zero
    rep     stosb
    movq    %r9, %rax                   # rax <- Return value
    popq    %rdi
    ret