summaryrefslogtreecommitdiff
path: root/EDK/Foundation/Library/EfiCommonLib/Ia32
diff options
context:
space:
mode:
Diffstat (limited to 'EDK/Foundation/Library/EfiCommonLib/Ia32')
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/DivU64x32.asm99
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm183
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm169
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMem.asm154
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMemSSE2.asm158
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMem.asm138
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMemSSE2.asm127
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/GetPowerOfTwo.asm67
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/LShiftU64.asm86
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/Log2.asm87
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/MultU64x32.asm74
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/Power10U64.asm72
-rw-r--r--EDK/Foundation/Library/EfiCommonLib/Ia32/RShiftU64.asm86
13 files changed, 1500 insertions, 0 deletions
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/DivU64x32.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/DivU64x32.asm
new file mode 100644
index 0000000..5fc399f
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/DivU64x32.asm
@@ -0,0 +1,99 @@
+ TITLE DivU64x32.asm: 64-bit division function for IA-32
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; DivU64x32.asm
+;
+; Abstract:
+;
+; 64-bit division function for IA-32
+;
+;------------------------------------------------------------------------------
+
+ .686P
+ .XMM
+ .MODEL SMALL
+ .CODE
+
+DivU64x32 PROTO C Dividend: QWORD, Divisor: DWORD, Remainder: DWORD
+
+DivU64x32 PROC C Dividend: QWORD, Divisor: DWORD, Remainder: DWORD
+
+;------------------------------------------------------------------------------
+; UINT64
+; DivU64x32 (
+; IN UINT64 Dividend,
+; IN UINTN Divisor,
+; OUT UINTN *Remainder OPTIONAL
+; )
+;
+; Routine Description:
+;
+; This routine allows a 64 bit value to be divided with a 32 bit value returns
+; 64bit result and the Remainder.
+;
+; Arguments:
+;
+; Dividend - dividend
+; Divisor - divisor
+; Remainder - buffer for remainder
+;
+; Returns:
+;
+; Dividend / Divisor
+; Remainder = Dividend mod Divisor
+;
+; N.B. only works for 31bit divisors!!
+;------------------------------------------------------------------------------
+
+ push ecx
+ ;
+ ; let edx contain the intermediate result of remainder
+ ;
+ xor edx, edx
+ mov ecx, 64
+
+_DivU64x32_Wend:
+ shl dword ptr Dividend[0], 1
+ rcl dword ptr Dividend[4], 1
+ rcl edx, 1
+
+ ;
+ ; If intermediate result of remainder is larger than
+ ; or equal to divisor, then set the lowest bit of dividend,
+ ; and subtract divisor from intermediate remainder
+ ;
+ cmp edx, Divisor
+ jb _DivU64x32_Cont
+ bts dword ptr Dividend[0], 0
+ sub edx, Divisor
+
+_DivU64x32_Cont:
+ loop _DivU64x32_Wend
+
+ cmp Remainder, 0
+ je _DivU64x32_Done
+ mov eax, Remainder
+ mov dword ptr [eax], edx
+
+_DivU64x32_Done:
+ mov eax, dword ptr Dividend[0]
+ mov edx, dword ptr Dividend[4]
+ pop ecx
+ ret
+
+DivU64x32 ENDP
+
+
+END
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm
new file mode 100644
index 0000000..504c08a
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm
@@ -0,0 +1,183 @@
+ TITLE EfiCopyMem.asm: Optimized memory-copy routine
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; EfiCopyMem.asm
+;
+; Abstract:
+;
+; This is the code that supports IA32-optimized CopyMem service
+;
+;------------------------------------------------------------------------------
+
+; PROC:PRIVATE
+ .686P
+ .MMX
+ .MODEL SMALL
+ .CODE
+
+EfiCommonLibCopyMem PROTO C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD
+
+;------------------------------------------------------------------------------
+; VOID
+; EfiCommonLibCopyMem (
+; IN VOID *Destination,
+; IN VOID *Source,
+; IN UINTN Count
+; )
+;------------------------------------------------------------------------------
+
+EfiCommonLibCopyMem PROC C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD
+ LOCAL MmxSave:QWORD
+
+ ; Put source and destination pointers in esi/edi
+ push esi
+ push edi
+ mov ecx, Count
+ mov esi, Source
+ mov edi, Destination
+
+ ; First off, make sure we have no overlap. That is to say,
+ ; if (Source == Destination) => do nothing
+ ; if (Source + Count <= Destination) => regular copy
+ ; if (Destination + Count <= Source) => regular copy
+ ; otherwise, do a reverse copy
+ mov eax, esi
+ add eax, ecx ; Source + Count
+ cmp eax, edi
+ jbe _StartByteCopy
+
+ mov eax, edi
+ add eax, ecx ; Dest + Count
+ cmp eax, esi
+ jbe _StartByteCopy
+
+ cmp esi, edi
+ je _CopyMemDone
+ jb _CopyOverlapped ; too bad -- overlaps
+
+ ; Pick up misaligned start bytes to get destination pointer 4-byte aligned
+_StartByteCopy:
+ cmp ecx, 0
+ je _CopyMemDone ; Count == 0, all done
+ mov edx, edi
+ and dl, 3 ; check lower 2 bits of address
+ test dl, dl
+ je SHORT _CopyBlocks ; already aligned?
+
+ ; Copy a byte
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ inc edi
+ inc esi
+ jmp _StartByteCopy ; back to top of loop
+
+_CopyBlocks:
+ ; Compute how many 64-byte blocks we can clear
+ mov eax, ecx ; get Count in eax
+ shr eax, 6 ; convert to 64-byte count
+ shl eax, 6 ; convert back to bytes
+ sub ecx, eax ; subtract from the original count
+ shr eax, 6 ; and this is how many 64-byte blocks
+
+ ; If no 64-byte blocks, then skip
+ cmp eax, 0
+ je _CopyRemainingDWords
+
+ ; Save mm0
+ movq MmxSave, mm0
+
+copymmx:
+
+ movq mm0, QWORD PTR ds:[esi]
+ movq QWORD PTR ds:[edi], mm0
+ movq mm0, QWORD PTR ds:[esi+8]
+ movq QWORD PTR ds:[edi+8], mm0
+ movq mm0, QWORD PTR ds:[esi+16]
+ movq QWORD PTR ds:[edi+16], mm0
+ movq mm0, QWORD PTR ds:[esi+24]
+ movq QWORD PTR ds:[edi+24], mm0
+ movq mm0, QWORD PTR ds:[esi+32]
+ movq QWORD PTR ds:[edi+32], mm0
+ movq mm0, QWORD PTR ds:[esi+40]
+ movq QWORD PTR ds:[edi+40], mm0
+ movq mm0, QWORD PTR ds:[esi+48]
+ movq QWORD PTR ds:[edi+48], mm0
+ movq mm0, QWORD PTR ds:[esi+56]
+ movq QWORD PTR ds:[edi+56], mm0
+
+ add edi, 64
+ add esi, 64
+ dec eax
+ jnz copymmx
+
+; Restore mm0
+ movq mm0, MmxSave
+ emms ; Exit MMX Instruction
+
+ ; Copy as many DWORDS as possible
+_CopyRemainingDWords:
+ cmp ecx, 4
+ jb _CopyRemainingBytes
+
+ mov eax, DWORD PTR [esi] ; get data from Source
+ mov DWORD PTR [edi], eax ; write byte to Destination
+ sub ecx, 4 ; decrement Count
+ add esi, 4 ; advance Source pointer
+ add edi, 4 ; advance Destination pointer
+ jmp _CopyRemainingDWords ; back to top
+
+_CopyRemainingBytes:
+ cmp ecx, 0
+ je _CopyMemDone
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ inc esi
+ inc edi ; advance Destination pointer
+ jmp SHORT _CopyRemainingBytes ; back to top of loop
+
+ ;
+ ; We do this block if the source and destination buffers overlap. To
+ ; handle it, copy starting at the end of the source buffer and work
+ ; your way back. Since this is the atypical case, this code has not
+ ; been optimized, and thus simply copies bytes.
+ ;
+_CopyOverlapped:
+
+ ; Move the source and destination pointers to the end of the range
+ add esi, ecx ; Source + Count
+ dec esi
+ add edi, ecx ; Dest + Count
+ dec edi
+
+_CopyOverlappedLoop:
+ cmp ecx, 0
+ je _CopyMemDone
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ dec esi
+ dec edi
+ jmp _CopyOverlappedLoop ; back to top of loop
+
+_CopyMemDone:
+ pop edi
+ pop esi
+
+ ret
+
+EfiCommonLibCopyMem ENDP
+ END
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm
new file mode 100644
index 0000000..d5000d0
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm
@@ -0,0 +1,169 @@
+ TITLE EfiCopyMem.asm: Optimized memory-copy routine
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; EfiCopyMem.asm
+;
+; Abstract:
+;
+; This is the code that supports IA32-optimized CopyMem service
+;
+;------------------------------------------------------------------------------
+
+; PROC:PRIVATE
+ .686P
+ .XMM
+ .MODEL SMALL
+ .CODE
+
+EfiCommonLibCopyMem PROTO C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD
+
+;------------------------------------------------------------------------------
+; VOID
+; EfiCommonLibCopyMem (
+; IN VOID *Destination,
+; IN VOID *Source,
+; IN UINTN Count
+; )
+;------------------------------------------------------------------------------
+
+EfiCommonLibCopyMem PROC C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD
+
+ ; Put source and destination pointers in esi/edi
+ push esi
+ push edi
+ mov ecx, Count
+ mov esi, Source
+ mov edi, Destination
+
+ ; First off, make sure we have no overlap. That is to say,
+ ; if (Source == Destination) => do nothing
+ ; if (Source + Count <= Destination) => regular copy
+ ; if (Destination + Count <= Source) => regular copy
+ ; otherwise, do a reverse copy
+ mov eax, esi
+ add eax, ecx ; Source + Count
+ cmp eax, edi
+ jle _StartByteCopy
+
+ mov eax, edi
+ add eax, ecx ; Dest + Count
+ cmp eax, esi
+ jle _StartByteCopy
+
+ cmp esi, edi
+ je _CopyMemDone
+ jl _CopyOverlapped ; too bad -- overlaps
+
+ ; Pick up misaligned start bytes to get destination pointer 4-byte aligned
+_StartByteCopy:
+ cmp ecx, 0
+ je _CopyMemDone ; Count == 0, all done
+ mov edx, edi
+ and dl, 3 ; check lower 2 bits of address
+ test dl, dl
+ je SHORT _CopyBlocks ; already aligned?
+
+ ; Copy a byte
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ inc edi
+ inc esi
+ jmp _StartByteCopy ; back to top of loop
+
+_CopyBlocks:
+ ; Compute how many 64-byte blocks we can clear
+ mov eax, ecx ; get Count in eax
+ shr eax, 6 ; convert to 64-byte count
+ shl eax, 6 ; convert back to bytes
+ sub ecx, eax ; subtract from the original count
+ shr eax, 6 ; and this is how many 64-byte blocks
+
+ ; If no 64-byte blocks, then skip
+ cmp eax, 0
+ je _CopyRemainingDWords
+
+
+copyxmm:
+
+ movdqu xmm0, OWORD PTR ds:[esi]
+ movdqu OWORD PTR ds:[edi], xmm0
+ movdqu xmm1, OWORD PTR ds:[esi+16]
+ movdqu OWORD PTR ds:[edi+16], xmm1
+ movdqu xmm2, OWORD PTR ds:[esi+32]
+ movdqu OWORD PTR ds:[edi+32], xmm2
+ movdqu xmm3, OWORD PTR ds:[esi+48]
+ movdqu OWORD PTR ds:[edi+48], xmm3
+
+ add edi, 64
+ add esi, 64
+ dec eax
+ jnz copyxmm
+
+
+ ; Copy as many DWORDS as possible
+_CopyRemainingDWords:
+ cmp ecx, 4
+ jb _CopyRemainingBytes
+
+ mov eax, DWORD PTR [esi] ; get data from Source
+ mov DWORD PTR [edi], eax ; write byte to Destination
+ sub ecx, 4 ; decrement Count
+ add esi, 4 ; advance Source pointer
+ add edi, 4 ; advance Destination pointer
+ jmp _CopyRemainingDWords ; back to top
+
+_CopyRemainingBytes:
+ cmp ecx, 0
+ je _CopyMemDone
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ inc esi
+ inc edi ; advance Destination pointer
+ jmp SHORT _CopyRemainingBytes ; back to top of loop
+
+ ;
+ ; We do this block if the source and destination buffers overlap. To
+ ; handle it, copy starting at the end of the source buffer and work
+ ; your way back. Since this is the atypical case, this code has not
+ ; been optimized, and thus simply copies bytes.
+ ;
+_CopyOverlapped:
+
+ ; Move the source and destination pointers to the end of the range
+ add esi, ecx ; Source + Count
+ dec esi
+ add edi, ecx ; Dest + Count
+ dec edi
+
+_CopyOverlappedLoop:
+ cmp ecx, 0
+ je _CopyMemDone
+ mov al, BYTE PTR [esi] ; get byte from Source
+ mov BYTE PTR [edi], al ; write byte to Destination
+ dec ecx
+ dec esi
+ dec edi
+ jmp _CopyOverlappedLoop ; back to top of loop
+
+_CopyMemDone:
+ pop edi
+ pop esi
+
+ ret
+
+EfiCommonLibCopyMem ENDP
+ END
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMem.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMem.asm
new file mode 100644
index 0000000..3b08c37
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMem.asm
@@ -0,0 +1,154 @@
+ TITLE EfiSetMem.asm: Optimized setmemory routine
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; EfiSetMem.asm
+;
+; Abstract:
+;
+; This is the code that supports IA32-optimized SetMem service
+;
+;------------------------------------------------------------------------------
+
+; PROC:PRIVATE
+ .686P
+ .MMX
+ .MODEL SMALL
+ .CODE
+
+EfiCommonLibSetMem PROTO C Buffer:PTR DWORD, Count:DWORD, Value:BYTE
+
+;------------------------------------------------------------------------------
+; Procedure: EfiCommonLibSetMem
+;
+; VOID
+; EfiCommonLibSetMem (
+; IN VOID *Buffer,
+; IN UINTN Count,
+; IN UINT8 Value
+; )
+;
+; Input: VOID *Buffer - Pointer to buffer to write
+; UINTN Count - Number of bytes to write
+; UINT8 Value - Value to write
+;
+; Output: None.
+;
+; Saves:
+;
+; Modifies:
+;
+; Description: This function is an optimized zero-memory function.
+;
+; Notes: This function tries to zero memory 8 bytes at a time. As a result,
+; it first picks up any misaligned bytes, then words, before getting
+; in the main loop that does the 8-byte clears.
+;
+;------------------------------------------------------------------------------
+EfiCommonLibSetMem PROC C Buffer:PTR DWORD, Count:DWORD, Value:BYTE
+ LOCAL QWordValue:QWORD
+ LOCAL MmxSave:QWORD
+
+
+ mov edx, Count
+ test edx, edx
+ je _SetMemDone
+
+ push edi
+ push ebx
+
+ mov eax, Buffer
+ mov bl, Value
+ mov edi, eax
+ mov bh, bl
+
+ cmp edx, 256
+ jb _SetRemindingByte
+
+ and al, 07h
+ test al, al
+ je _SetBlock
+
+ mov eax, edi
+ shr eax, 3
+ inc eax
+ shl eax, 3
+ sub eax, edi
+ cmp eax, edx
+ jnb _SetRemindingByte
+
+ sub edx, eax
+ mov ecx, eax
+
+ mov al, bl
+ rep stosb
+
+_SetBlock:
+ mov eax, edx
+ shr eax, 6
+ test eax, eax
+ je _SetRemindingByte
+
+ shl eax, 6
+ sub edx, eax
+ shr eax, 6
+
+ mov WORD PTR QWordValue[0], bx
+ mov WORD PTR QWordValue[2], bx
+ mov WORD PTR QWordValue[4], bx
+ mov WORD PTR QWordValue[6], bx
+
+
+ movq MmxSave, mm0
+ movq mm0, QWordValue
+
+@@:
+ movq QWORD PTR ds:[edi], mm0
+ movq QWORD PTR ds:[edi+8], mm0
+ movq QWORD PTR ds:[edi+16], mm0
+ movq QWORD PTR ds:[edi+24], mm0
+ movq QWORD PTR ds:[edi+32], mm0
+ movq QWORD PTR ds:[edi+40], mm0
+ movq QWORD PTR ds:[edi+48], mm0
+ movq QWORD PTR ds:[edi+56], mm0
+ add edi, 64
+ dec eax
+ jnz @B
+
+; Restore mm0
+ movq mm0, MmxSave
+ emms ; Exit MMX Instruction
+
+_SetRemindingByte:
+ mov ecx, edx
+
+ mov eax, ebx
+ shl eax, 16
+ mov ax, bx
+ shr ecx, 2
+ rep stosd
+
+ mov ecx, edx
+ and ecx, 3
+ rep stosb
+
+ pop ebx
+ pop edi
+
+_SetMemDone:
+ ret 0
+
+EfiCommonLibSetMem ENDP
+ END
+ \ No newline at end of file
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMemSSE2.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMemSSE2.asm
new file mode 100644
index 0000000..5d6652d
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMemSSE2.asm
@@ -0,0 +1,158 @@
+ TITLE EfiSetMem.asm: Optimized setmemory routine
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; EfiCommonLibSetMem.asm
+;
+; Abstract:
+;
+; This is the code that supports IA32-optimized SetMem service
+;
+;------------------------------------------------------------------------------
+
+; PROC:PRIVATE
+ .686P
+ .MMX
+ .XMM
+ .MODEL SMALL
+ .CODE
+
+EfiCommonLibSetMem PROTO C Buffer:PTR DWORD, Count:DWORD, Value:BYTE
+
+;------------------------------------------------------------------------------
+; Procedure: EfiCommonLibSetMem
+;
+; VOID
+; EfiCommonLibSetMem (
+; IN VOID *Buffer,
+; IN UINTN Count,
+; IN UINT8 Value
+; )
+;
+; Input: VOID *Buffer - Pointer to buffer to write
+; UINTN Count - Number of bytes to write
+; UINT8 Value - Value to write
+;
+; Output: None.
+;
+; Saves:
+;
+; Modifies:
+;
+; Description: This function is an optimized zero-memory function.
+;
+; Notes: This function tries to zero memory 8 bytes at a time. As a result,
+; it first picks up any misaligned bytes, then words, before getting
+; in the main loop that does the 8-byte clears.
+;
+;------------------------------------------------------------------------------
+EfiCommonLibSetMem PROC C Buffer:PTR DWORD, Count:DWORD, Value:BYTE
+ LOCAL QWordValue:QWORD
+ LOCAL MmxSave:QWORD
+
+
+ mov edx, Count
+ test edx, edx
+ je _SetMemDone
+
+ push edi
+ push ebx
+
+ mov eax, Buffer
+ mov bl, Value
+ mov edi, eax
+ mov bh, bl
+
+ cmp edx, 256
+ jb _SetRemindingByte
+
+ and al, 0fh
+ test al, al
+ je _SetBlock
+
+ mov eax, edi
+ shr eax, 4
+ inc eax
+ shl eax, 4
+ sub eax, edi
+ cmp eax, edx
+ jnb _SetRemindingByte
+
+ sub edx, eax
+ mov ecx, eax
+
+ mov al, bl
+ rep stosb
+
+_SetBlock:
+ mov eax, edx
+ shr eax, 7
+ test eax, eax
+ je _SetRemindingByte
+
+ shl eax, 7
+ sub edx, eax
+ shr eax, 7
+
+ mov WORD PTR QWordValue[0], bx
+ mov WORD PTR QWordValue[2], bx
+ mov WORD PTR QWordValue[4], bx
+ mov WORD PTR QWordValue[6], bx
+
+
+ movq MmxSave, mm0
+ movq mm0, QWordValue
+
+ movq2dq xmm1, mm0
+ pshufd xmm1, xmm1, 0
+
+@@:
+ movdqa OWORD PTR ds:[edi], xmm1
+ movdqa OWORD PTR ds:[edi+16], xmm1
+ movdqa OWORD PTR ds:[edi+32], xmm1
+ movdqa OWORD PTR ds:[edi+48], xmm1
+ movdqa OWORD PTR ds:[edi+64], xmm1
+ movdqa OWORD PTR ds:[edi+80], xmm1
+ movdqa OWORD PTR ds:[edi+96], xmm1
+ movdqa OWORD PTR ds:[edi+112], xmm1
+ add edi, 128
+ dec eax
+ jnz @B
+
+; Restore mm0
+ movq mm0, MmxSave
+ emms ; Exit MMX Instruction
+
+_SetRemindingByte:
+ mov ecx, edx
+
+ mov eax, ebx
+ shl eax, 16
+ mov ax, bx
+ shr ecx, 2
+ rep stosd
+
+ mov ecx, edx
+ and ecx, 3
+ rep stosb
+
+ pop ebx
+ pop edi
+
+_SetMemDone:
+ ret 0
+
+EfiCommonLibSetMem ENDP
+ END
+ \ No newline at end of file
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMem.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMem.asm
new file mode 100644
index 0000000..7188e29
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMem.asm
@@ -0,0 +1,138 @@
+ TITLE EfiZeroMem.asm: Optimized memory-zero routine
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; EfiZeroMem.asm
+;
+; Abstract:
+;
+; This is the code that supports IA32-optimized ZeroMem service
+;
+;------------------------------------------------------------------------------
+
+; PROC:PRIVATE
+ .686P
+ .MMX
+ .MODEL SMALL
+ .CODE
+
+EfiCommonLibZeroMem PROTO C Buffer:PTR DWORD, Count:DWORD
+
+;------------------------------------------------------------------------------
+; Procedure: EfiCommonLibZeroMem
+;
+; VOID
+; EfiCommonLibZeroMem (
+; IN VOID *Buffer,
+; IN UINTN Count
+; )
+;
+; Input: VOID *Buffer - Pointer to buffer to clear
+; UINTN Count - Number of bytes to clear
+;
+; Output: None.
+;
+; Saves:
+;
+; Modifies:
+;
+; Description: This function is an optimized zero-memory function.
+;
+; Notes: This function tries to zero memory 8 bytes at a time. As a result,
+; it first picks up any misaligned bytes, then words, before getting
+; in the main loop that does the 8-byte clears.
+;
+;------------------------------------------------------------------------------
+EfiCommonLibZeroMem PROC C Buffer:PTR DWORD, Count:DWORD
+ LOCAL MmxSave:QWORD
+
+ ; Save edi, then put the buffer pointer into it.
+ push edi
+ mov ecx, Count
+ mov edi, Buffer
+
+ ; Pick up misaligned start bytes (get pointer 4-byte aligned)
+_StartByteZero:
+ mov eax, edi
+ and al, 3 ; check lower 2 bits of address
+ test al, al
+ je _ZeroBlocks ; already aligned?
+ cmp ecx, 0
+ je _ZeroMemDone
+
+ ; Clear the byte memory location
+ mov BYTE PTR [edi], 0
+ inc edi
+
+ ; Decrement our count
+ dec ecx
+ jmp _StartByteZero ; back to top of loop
+
+_ZeroBlocks:
+
+ ; Compute how many 64-byte blocks we can clear
+ mov edx, ecx
+ shr ecx, 6 ; convert to 64-byte count
+ shl ecx, 6 ; convert back to bytes
+ sub edx, ecx ; subtract from the original count
+ shr ecx, 6 ; and this is how many 64-byte blocks
+
+ ; If no 64-byte blocks, then skip
+ cmp ecx, 0
+ je _ZeroRemaining
+
+ ; Save mm0
+ movq MmxSave, mm0
+
+ pxor mm0, mm0 ; Clear mm0
+
+@@:
+ movq QWORD PTR ds:[edi], mm0
+ movq QWORD PTR ds:[edi+8], mm0
+ movq QWORD PTR ds:[edi+16], mm0
+ movq QWORD PTR ds:[edi+24], mm0
+ movq QWORD PTR ds:[edi+32], mm0
+ movq QWORD PTR ds:[edi+40], mm0
+ movq QWORD PTR ds:[edi+48], mm0
+ movq QWORD PTR ds:[edi+56], mm0
+
+ add edi, 64
+ dec ecx
+ jnz @B
+
+; Restore mm0
+ movq mm0, MmxSave
+ emms ; Exit MMX Instruction
+
+_ZeroRemaining:
+ ; Zero out as many DWORDS as possible
+ mov ecx, edx
+ shr ecx, 2
+ xor eax, eax
+
+ rep stosd
+
+ ; Zero out remaining as bytes
+ mov ecx, edx
+ and ecx, 03
+
+ rep stosb
+
+_ZeroMemDone:
+ pop edi
+
+ ret
+
+EfiCommonLibZeroMem ENDP
+ END
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMemSSE2.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMemSSE2.asm
new file mode 100644
index 0000000..a5efd4d
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMemSSE2.asm
@@ -0,0 +1,127 @@
+ TITLE EfiZeroMem.asm: Optimized memory-zero routine
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; EfiZeroMem.asm
+;
+; Abstract:
+;
+; This is the code that supports IA32-optimized ZeroMem service
+;
+;------------------------------------------------------------------------------
+
+; PROC:PRIVATE
+ .686P
+ .XMM
+ .MODEL SMALL
+ .CODE
+
+EfiCommonLibZeroMem PROTO C Buffer:PTR DWORD, Count:DWORD
+
+;------------------------------------------------------------------------------
+; Procedure: EfiCommonLibZeroMem
+;
+; VOID
+; EfiCommonLibZeroMem (
+; IN VOID *Buffer,
+; IN UINTN Count
+; )
+;
+; Input: VOID *Buffer - Pointer to buffer to clear
+; UINTN Count - Number of bytes to clear
+;
+; Output: None.
+;
+; Saves:
+;
+; Modifies:
+;
+; Description: This function is an optimized zero-memory function.
+;
+; Notes: This function tries to zero memory 8 bytes at a time. As a result,
+; it first picks up any misaligned bytes, then words, before getting
+; in the main loop that does the 8-byte clears.
+;
+;------------------------------------------------------------------------------
+EfiCommonLibZeroMem PROC C Buffer:PTR DWORD, Count:DWORD
+
+ ; Save edi, then put the buffer pointer into it.
+ push edi
+ mov ecx, Count
+ mov edi, Buffer
+
+ ; Pick up misaligned start bytes (get pointer 4-byte aligned)
+_StartByteZero:
+ mov eax, edi
+ and al, 3 ; check lower 2 bits of address
+ test al, al
+ je _ZeroBlocks ; already aligned?
+ cmp ecx, 0
+ je _ZeroMemDone
+
+ ; Clear the byte memory location
+ mov BYTE PTR [edi], 0
+ inc edi
+
+ ; Decrement our count
+ dec ecx
+ jmp _StartByteZero ; back to top of loop
+
+_ZeroBlocks:
+
+ ; Compute how many 64-byte blocks we can clear
+ mov edx, ecx
+ shr ecx, 6 ; convert to 64-byte count
+ shl ecx, 6 ; convert back to bytes
+ sub edx, ecx ; subtract from the original count
+ shr ecx, 6 ; and this is how many 64-byte blocks
+
+ ; If no 64-byte blocks, then skip
+ cmp ecx, 0
+ je _ZeroRemaining
+
+ xorps xmm1, xmm1
+
+@@:
+ movdqu OWORD PTR ds:[edi], xmm1
+ movdqu OWORD PTR ds:[edi+16], xmm1
+ movdqu OWORD PTR ds:[edi+32], xmm1
+ movdqu OWORD PTR ds:[edi+48], xmm1
+
+ add edi, 64
+ dec ecx
+ jnz @B
+
+
+_ZeroRemaining:
+ ; Zero out as many DWORDS as possible
+ mov ecx, edx
+ shr ecx, 2
+ xor eax, eax
+
+ rep stosd
+
+ ; Zero out remaining as bytes
+ mov ecx, edx
+ and ecx, 03
+
+ rep stosb
+
+_ZeroMemDone:
+ pop edi
+
+ ret
+
+EfiCommonLibZeroMem ENDP
+ END
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/GetPowerOfTwo.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/GetPowerOfTwo.asm
new file mode 100644
index 0000000..9f0d962
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/GetPowerOfTwo.asm
@@ -0,0 +1,67 @@
+ TITLE GetPowerOfTwo.asm: Calculates the power of two value just below input
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2005 Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; GetPowerOfTwo.asm
+;
+; Abstract:
+;
+; Calculates the largest integer that is both
+; a power of two and less than Input
+;
+;------------------------------------------------------------------------------
+
+ .686P
+ .XMM
+ .MODEL SMALL
+ .CODE
+
+_GetPowerOfTwo PROC
+;------------------------------------------------------------------------------
+; UINT32
+; _GetPowerOfTwo (
+; IN UINT32 Input
+; )
+;
+; Routine Description:
+;
+; Calculates the largest integer that is both
+; a power of two and less than Input
+;
+; Arguments:
+;
+; Input - value to calculate power of two
+;
+; Returns:
+;
+; the largest integer that is both a power of
+; two and less than Input
+;------------------------------------------------------------------------------
+ xor eax, eax
+ mov edx, eax
+ mov ecx, [esp + 8]
+ jecxz @F
+ bsr ecx, ecx
+ bts edx, ecx
+ jmp @Exit
+@@:
+ mov ecx, [esp + 4]
+ jecxz @Exit
+ bsr ecx, ecx
+ bts eax, ecx
+@Exit:
+ ret
+_GetPowerOfTwo ENDP
+
+END
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/LShiftU64.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/LShiftU64.asm
new file mode 100644
index 0000000..de6d17e
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/LShiftU64.asm
@@ -0,0 +1,86 @@
+ TITLE LShiftU64.asm: 64-bit left shift function for IA-32
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; LShiftU64.asm
+;
+; Abstract:
+;
+; 64-bit left shift function for IA-32
+;
+;------------------------------------------------------------------------------
+
+ .686P
+ .XMM
+ .MODEL SMALL
+ .CODE
+
+LShiftU64 PROTO C Operand: QWORD, Count: DWORD
+
+LShiftU64 PROC C Operand: QWORD, Count: DWORD
+
+;------------------------------------------------------------------------------
+; UINT64
+; LShiftU64 (
+; IN UINT64 Operand,
+; IN UINTN Count
+; )
+;
+; Routine Description:
+;
+; This routine allows a 64 bit value to be left shifted by 32 bits and
+; returns the shifted value.
+; Count is valid up 63. (Only Bits 0-5 is valid for Count)
+;
+; Arguments:
+;
+; Operand - Value to be shifted
+; Count - Number of times to shift left.
+;
+; Returns:
+;
+; Value shifted left identified by the Count.
+;------------------------------------------------------------------------------
+
+ push ecx
+
+ mov eax, dword ptr Operand[0]
+ mov edx, dword ptr Operand[4]
+
+ ;
+ ; CL is valid from 0 - 31. shld will move EDX:EAX by CL times but EAX is not touched
+ ; For CL of 32 - 63, it will be shifted 0 - 31 so we will move eax to edx later.
+ ;
+ mov ecx, Count
+ and ecx, 63
+ shld edx, eax, cl
+ shl eax, cl
+
+ ;
+ ; Since Count is 32 - 63, eax will have been shifted by 0 - 31
+ ; If shifted by 32 or more, set lower 32 bits to zero.
+ ;
+ cmp ecx, 32
+ jc short _LShiftU64_Done
+
+ mov edx, eax
+ xor eax, eax
+
+_LShiftU64_Done:
+ pop ecx
+ ret
+
+LShiftU64 ENDP
+
+END
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/Log2.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/Log2.asm
new file mode 100644
index 0000000..167086e
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/Log2.asm
@@ -0,0 +1,87 @@
+ TITLE Log2.asm: 64-bit integer logarithm function for IA-32
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; Log2.asm
+;
+; Abstract:
+;
+; 64-bit integer logarithm function for IA-32
+;
+;------------------------------------------------------------------------------
+
+ .686P
+ .XMM
+ .MODEL SMALL
+ .CODE
+
+Log2 PROTO C Operand: QWORD
+
+Log2 PROC C Operand: QWORD
+
+;------------------------------------------------------------------------------
+; UINT8
+; Log2 (
+; IN UINT64 Operand
+; )
+;
+; Routine Description:
+;
+; Calculates and floors logarithms based on 2
+;
+; Arguments:
+;
+; Operand - value to calculate logarithm
+;
+; Returns:
+;
+; The largest integer that is less than or equal
+; to the logarithm of Operand based on 2
+;------------------------------------------------------------------------------
+
+ push ecx
+
+ mov ecx, 64
+
+ cmp dword ptr Operand[0], 0
+ jne _Log2_Wend
+ cmp dword ptr Operand[4], 0
+ jne _Log2_Wend
+ mov cl, 0FFH
+ jmp _Log2_Done
+
+_Log2_Wend:
+ dec ecx
+ cmp ecx, 32
+ jae _Log2_Higher
+ bt dword ptr Operand[0], ecx
+ jmp _Log2_Bit
+
+_Log2_Higher:
+ mov eax, ecx
+ sub eax, 32
+ bt dword ptr Operand[4], eax
+
+_Log2_Bit:
+ jc _Log2_Done
+ jmp _Log2_Wend
+
+_Log2_Done:
+ mov al, cl
+ pop ecx
+ ret
+
+Log2 ENDP
+
+END
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/MultU64x32.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/MultU64x32.asm
new file mode 100644
index 0000000..351020e
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/MultU64x32.asm
@@ -0,0 +1,74 @@
+ TITLE MultU64x32.asm: 64-bit Multiplication function for IA-32
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; MultU64x32.asm
+;
+; Abstract:
+;
+; 64-bit Multiplication function for IA-32
+;
+;------------------------------------------------------------------------------
+
+ .686P
+ .XMM
+ .MODEL SMALL, C
+ .CODE
+
+MultU64x32 PROTO C Multiplicand: QWORD, Multiplier: DWORD
+
+MultU64x32 PROC C Multiplicand: QWORD, Multiplier: DWORD
+
+;------------------------------------------------------------------------------
+; UINT64
+; MultU64x32 (
+; IN UINT64 Multiplicand,
+; IN UINTN Multiplier
+; )
+;
+; Routine Description:
+;
+; This routine allows a 64 bit value to be multiplied with a 32 bit
+; value returns 64bit result.
+; No checking if the result is greater than 64bits
+;
+; Arguments:
+;
+; Multiplicand - multiplicand
+; Multiplier - multiplier
+;
+; Returns:
+;
+; Multiplicand * Multiplier
+;------------------------------------------------------------------------------
+
+ mov eax, dword ptr Multiplicand[0]
+ mul Multiplier
+ push eax
+ push edx
+ mov eax, dword ptr Multiplicand[4]
+ mul Multiplier
+ ;
+ ; The value in edx stored by second multiplication overflows
+ ; the output and should be discarded. So here we overwrite it
+ ; with the edx value of first multiplication.
+ ;
+ pop edx
+ add edx, eax
+ pop eax
+ ret
+
+MultU64x32 ENDP
+
+END
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/Power10U64.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/Power10U64.asm
new file mode 100644
index 0000000..0346e03
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/Power10U64.asm
@@ -0,0 +1,72 @@
+ TITLE Power10U64.asm: calculates Operand * 10 ^ Power
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; Power10U64.asm
+;
+; Abstract:
+;
+; Calculates Operand * 10 ^ Power
+;
+;------------------------------------------------------------------------------
+
+ .686P
+ .XMM
+ .MODEL SMALL
+ .CODE
+
+Power10U64 PROTO C Operand: QWORD, Power: DWORD
+MultU64x32 PROTO C Multiplicand: QWORD, Multiplier: DWORD
+
+Power10U64 PROC C Operand: QWORD, Power: DWORD
+
+;------------------------------------------------------------------------------
+; UINT64
+; Power10U64 (
+; IN UINT64 Operand,
+; IN UINTN Power
+; )
+;
+; Routine Description:
+;
+; Raise 10 to the power of Power, and multiply the result with Operand
+;
+; Arguments:
+;
+; Operand - multiplicand
+; Power - power
+;
+; Returns:
+;
+; Operand * 10 ^ Power
+;------------------------------------------------------------------------------
+
+ push ecx
+
+ mov ecx, Power
+ jcxz _Power10U64_Done
+
+_Power10U64_Wend:
+ invoke MultU64x32, Operand, 10
+ mov dword ptr Operand[0], eax
+ mov dword ptr Operand[4], edx
+ loop _Power10U64_Wend
+
+_Power10U64_Done:
+ pop ecx
+ ret
+
+Power10U64 ENDP
+
+END
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/RShiftU64.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/RShiftU64.asm
new file mode 100644
index 0000000..de05d20
--- /dev/null
+++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/RShiftU64.asm
@@ -0,0 +1,86 @@
+ TITLE RShiftU64.asm: 64-bit right shift function for IA-32
+
+;------------------------------------------------------------------------------
+;
+; Copyright (c) 2004, Intel Corporation
+; All rights reserved. This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; RShiftU64.asm
+;
+; Abstract:
+;
+; 64-bit right shift function for IA-32
+;
+;------------------------------------------------------------------------------
+
+ .686P
+ .XMM
+ .MODEL SMALL
+ .CODE
+
+RShiftU64 PROTO C Operand: QWORD, Count: DWORD
+
+RShiftU64 PROC C Operand: QWORD, Count: DWORD
+
+;------------------------------------------------------------------------------
+; UINT64
+; RShiftU64 (
+; IN UINT64 Operand,
+; IN UINTN Count
+; )
+;
+; Routine Description:
+;
+; This routine allows a 64 bit value to be right shifted by 32 bits and returns the
+; shifted value.
+; Count is valid up 63. (Only Bits 0-5 is valid for Count)
+;
+; Arguments:
+;
+; Operand - Value to be shifted
+; Count - Number of times to shift right.
+;
+; Returns:
+;
+; Value shifted right identified by the Count.
+;------------------------------------------------------------------------------
+
+ push ecx
+
+ mov eax, dword ptr Operand[0]
+ mov edx, dword ptr Operand[4]
+
+ ;
+ ; CL is valid from 0 - 31. shld will move EDX:EAX by CL times but EDX is not touched
+ ; For CL of 32 - 63, it will be shifted 0 - 31 so we will move edx to eax later.
+ ;
+ mov ecx, Count
+ and ecx, 63
+ shrd eax, edx, cl
+ shr edx, cl
+
+ cmp ecx, 32
+ jc short _RShiftU64_Done
+
+ ;
+ ; Since Count is 32 - 63, edx will have been shifted by 0 - 31
+ ; If shifted by 32 or more, set upper 32 bits to zero.
+ ;
+ mov eax, edx
+ xor edx, edx
+
+_RShiftU64_Done:
+ pop ecx
+ ret
+
+RShiftU64 ENDP
+
+END