diff options
Diffstat (limited to 'EDK/Foundation/Library/EfiCommonLib/Ia32')
13 files changed, 1500 insertions, 0 deletions
diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/DivU64x32.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/DivU64x32.asm new file mode 100644 index 0000000..5fc399f --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/DivU64x32.asm @@ -0,0 +1,99 @@ + TITLE DivU64x32.asm: 64-bit division function for IA-32 + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; DivU64x32.asm +; +; Abstract: +; +; 64-bit division function for IA-32 +; +;------------------------------------------------------------------------------ + + .686P + .XMM + .MODEL SMALL + .CODE + +DivU64x32 PROTO C Dividend: QWORD, Divisor: DWORD, Remainder: DWORD + +DivU64x32 PROC C Dividend: QWORD, Divisor: DWORD, Remainder: DWORD + +;------------------------------------------------------------------------------ +; UINT64 +; DivU64x32 ( +; IN UINT64 Dividend, +; IN UINTN Divisor, +; OUT UINTN *Remainder OPTIONAL +; ) +; +; Routine Description: +; +; This routine allows a 64 bit value to be divided with a 32 bit value returns +; 64bit result and the Remainder. +; +; Arguments: +; +; Dividend - dividend +; Divisor - divisor +; Remainder - buffer for remainder +; +; Returns: +; +; Dividend / Divisor +; Remainder = Dividend mod Divisor +; +; N.B. only works for 31bit divisors!! +;------------------------------------------------------------------------------ + + push ecx + ; + ; let edx contain the intermediate result of remainder + ; + xor edx, edx + mov ecx, 64 + +_DivU64x32_Wend: + shl dword ptr Dividend[0], 1 + rcl dword ptr Dividend[4], 1 + rcl edx, 1 + + ; + ; If intermediate result of remainder is larger than + ; or equal to divisor, then set the lowest bit of dividend, + ; and subtract divisor from intermediate remainder + ; + cmp edx, Divisor + jb _DivU64x32_Cont + bts dword ptr Dividend[0], 0 + sub edx, Divisor + +_DivU64x32_Cont: + loop _DivU64x32_Wend + + cmp Remainder, 0 + je _DivU64x32_Done + mov eax, Remainder + mov dword ptr [eax], edx + +_DivU64x32_Done: + mov eax, dword ptr Dividend[0] + mov edx, dword ptr Dividend[4] + pop ecx + ret + +DivU64x32 ENDP + + +END diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm new file mode 100644 index 0000000..504c08a --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMem.asm @@ -0,0 +1,183 @@ + TITLE EfiCopyMem.asm: Optimized memory-copy routine + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; EfiCopyMem.asm +; +; Abstract: +; +; This is the code that supports IA32-optimized CopyMem service +; +;------------------------------------------------------------------------------ + +; PROC:PRIVATE + .686P + .MMX + .MODEL SMALL + .CODE + +EfiCommonLibCopyMem PROTO C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD + +;------------------------------------------------------------------------------ +; VOID +; EfiCommonLibCopyMem ( +; IN VOID *Destination, +; IN VOID *Source, +; IN UINTN Count +; ) +;------------------------------------------------------------------------------ + +EfiCommonLibCopyMem PROC C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD + LOCAL MmxSave:QWORD + + ; Put source and destination pointers in esi/edi + push esi + push edi + mov ecx, Count + mov esi, Source + mov edi, Destination + + ; First off, make sure we have no overlap. That is to say, + ; if (Source == Destination) => do nothing + ; if (Source + Count <= Destination) => regular copy + ; if (Destination + Count <= Source) => regular copy + ; otherwise, do a reverse copy + mov eax, esi + add eax, ecx ; Source + Count + cmp eax, edi + jbe _StartByteCopy + + mov eax, edi + add eax, ecx ; Dest + Count + cmp eax, esi + jbe _StartByteCopy + + cmp esi, edi + je _CopyMemDone + jb _CopyOverlapped ; too bad -- overlaps + + ; Pick up misaligned start bytes to get destination pointer 4-byte aligned +_StartByteCopy: + cmp ecx, 0 + je _CopyMemDone ; Count == 0, all done + mov edx, edi + and dl, 3 ; check lower 2 bits of address + test dl, dl + je SHORT _CopyBlocks ; already aligned? + + ; Copy a byte + mov al, BYTE PTR [esi] ; get byte from Source + mov BYTE PTR [edi], al ; write byte to Destination + dec ecx + inc edi + inc esi + jmp _StartByteCopy ; back to top of loop + +_CopyBlocks: + ; Compute how many 64-byte blocks we can clear + mov eax, ecx ; get Count in eax + shr eax, 6 ; convert to 64-byte count + shl eax, 6 ; convert back to bytes + sub ecx, eax ; subtract from the original count + shr eax, 6 ; and this is how many 64-byte blocks + + ; If no 64-byte blocks, then skip + cmp eax, 0 + je _CopyRemainingDWords + + ; Save mm0 + movq MmxSave, mm0 + +copymmx: + + movq mm0, QWORD PTR ds:[esi] + movq QWORD PTR ds:[edi], mm0 + movq mm0, QWORD PTR ds:[esi+8] + movq QWORD PTR ds:[edi+8], mm0 + movq mm0, QWORD PTR ds:[esi+16] + movq QWORD PTR ds:[edi+16], mm0 + movq mm0, QWORD PTR ds:[esi+24] + movq QWORD PTR ds:[edi+24], mm0 + movq mm0, QWORD PTR ds:[esi+32] + movq QWORD PTR ds:[edi+32], mm0 + movq mm0, QWORD PTR ds:[esi+40] + movq QWORD PTR ds:[edi+40], mm0 + movq mm0, QWORD PTR ds:[esi+48] + movq QWORD PTR ds:[edi+48], mm0 + movq mm0, QWORD PTR ds:[esi+56] + movq QWORD PTR ds:[edi+56], mm0 + + add edi, 64 + add esi, 64 + dec eax + jnz copymmx + +; Restore mm0 + movq mm0, MmxSave + emms ; Exit MMX Instruction + + ; Copy as many DWORDS as possible +_CopyRemainingDWords: + cmp ecx, 4 + jb _CopyRemainingBytes + + mov eax, DWORD PTR [esi] ; get data from Source + mov DWORD PTR [edi], eax ; write byte to Destination + sub ecx, 4 ; decrement Count + add esi, 4 ; advance Source pointer + add edi, 4 ; advance Destination pointer + jmp _CopyRemainingDWords ; back to top + +_CopyRemainingBytes: + cmp ecx, 0 + je _CopyMemDone + mov al, BYTE PTR [esi] ; get byte from Source + mov BYTE PTR [edi], al ; write byte to Destination + dec ecx + inc esi + inc edi ; advance Destination pointer + jmp SHORT _CopyRemainingBytes ; back to top of loop + + ; + ; We do this block if the source and destination buffers overlap. To + ; handle it, copy starting at the end of the source buffer and work + ; your way back. Since this is the atypical case, this code has not + ; been optimized, and thus simply copies bytes. + ; +_CopyOverlapped: + + ; Move the source and destination pointers to the end of the range + add esi, ecx ; Source + Count + dec esi + add edi, ecx ; Dest + Count + dec edi + +_CopyOverlappedLoop: + cmp ecx, 0 + je _CopyMemDone + mov al, BYTE PTR [esi] ; get byte from Source + mov BYTE PTR [edi], al ; write byte to Destination + dec ecx + dec esi + dec edi + jmp _CopyOverlappedLoop ; back to top of loop + +_CopyMemDone: + pop edi + pop esi + + ret + +EfiCommonLibCopyMem ENDP + END diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm new file mode 100644 index 0000000..d5000d0 --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.asm @@ -0,0 +1,169 @@ + TITLE EfiCopyMem.asm: Optimized memory-copy routine + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; EfiCopyMem.asm +; +; Abstract: +; +; This is the code that supports IA32-optimized CopyMem service +; +;------------------------------------------------------------------------------ + +; PROC:PRIVATE + .686P + .XMM + .MODEL SMALL + .CODE + +EfiCommonLibCopyMem PROTO C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD + +;------------------------------------------------------------------------------ +; VOID +; EfiCommonLibCopyMem ( +; IN VOID *Destination, +; IN VOID *Source, +; IN UINTN Count +; ) +;------------------------------------------------------------------------------ + +EfiCommonLibCopyMem PROC C Destination:PTR DWORD, Source:PTR DWORD, Count:DWORD + + ; Put source and destination pointers in esi/edi + push esi + push edi + mov ecx, Count + mov esi, Source + mov edi, Destination + + ; First off, make sure we have no overlap. That is to say, + ; if (Source == Destination) => do nothing + ; if (Source + Count <= Destination) => regular copy + ; if (Destination + Count <= Source) => regular copy + ; otherwise, do a reverse copy + mov eax, esi + add eax, ecx ; Source + Count + cmp eax, edi + jle _StartByteCopy + + mov eax, edi + add eax, ecx ; Dest + Count + cmp eax, esi + jle _StartByteCopy + + cmp esi, edi + je _CopyMemDone + jl _CopyOverlapped ; too bad -- overlaps + + ; Pick up misaligned start bytes to get destination pointer 4-byte aligned +_StartByteCopy: + cmp ecx, 0 + je _CopyMemDone ; Count == 0, all done + mov edx, edi + and dl, 3 ; check lower 2 bits of address + test dl, dl + je SHORT _CopyBlocks ; already aligned? + + ; Copy a byte + mov al, BYTE PTR [esi] ; get byte from Source + mov BYTE PTR [edi], al ; write byte to Destination + dec ecx + inc edi + inc esi + jmp _StartByteCopy ; back to top of loop + +_CopyBlocks: + ; Compute how many 64-byte blocks we can clear + mov eax, ecx ; get Count in eax + shr eax, 6 ; convert to 64-byte count + shl eax, 6 ; convert back to bytes + sub ecx, eax ; subtract from the original count + shr eax, 6 ; and this is how many 64-byte blocks + + ; If no 64-byte blocks, then skip + cmp eax, 0 + je _CopyRemainingDWords + + +copyxmm: + + movdqu xmm0, OWORD PTR ds:[esi] + movdqu OWORD PTR ds:[edi], xmm0 + movdqu xmm1, OWORD PTR ds:[esi+16] + movdqu OWORD PTR ds:[edi+16], xmm1 + movdqu xmm2, OWORD PTR ds:[esi+32] + movdqu OWORD PTR ds:[edi+32], xmm2 + movdqu xmm3, OWORD PTR ds:[esi+48] + movdqu OWORD PTR ds:[edi+48], xmm3 + + add edi, 64 + add esi, 64 + dec eax + jnz copyxmm + + + ; Copy as many DWORDS as possible +_CopyRemainingDWords: + cmp ecx, 4 + jb _CopyRemainingBytes + + mov eax, DWORD PTR [esi] ; get data from Source + mov DWORD PTR [edi], eax ; write byte to Destination + sub ecx, 4 ; decrement Count + add esi, 4 ; advance Source pointer + add edi, 4 ; advance Destination pointer + jmp _CopyRemainingDWords ; back to top + +_CopyRemainingBytes: + cmp ecx, 0 + je _CopyMemDone + mov al, BYTE PTR [esi] ; get byte from Source + mov BYTE PTR [edi], al ; write byte to Destination + dec ecx + inc esi + inc edi ; advance Destination pointer + jmp SHORT _CopyRemainingBytes ; back to top of loop + + ; + ; We do this block if the source and destination buffers overlap. To + ; handle it, copy starting at the end of the source buffer and work + ; your way back. Since this is the atypical case, this code has not + ; been optimized, and thus simply copies bytes. + ; +_CopyOverlapped: + + ; Move the source and destination pointers to the end of the range + add esi, ecx ; Source + Count + dec esi + add edi, ecx ; Dest + Count + dec edi + +_CopyOverlappedLoop: + cmp ecx, 0 + je _CopyMemDone + mov al, BYTE PTR [esi] ; get byte from Source + mov BYTE PTR [edi], al ; write byte to Destination + dec ecx + dec esi + dec edi + jmp _CopyOverlappedLoop ; back to top of loop + +_CopyMemDone: + pop edi + pop esi + + ret + +EfiCommonLibCopyMem ENDP + END diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMem.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMem.asm new file mode 100644 index 0000000..3b08c37 --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMem.asm @@ -0,0 +1,154 @@ + TITLE EfiSetMem.asm: Optimized setmemory routine + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; EfiSetMem.asm +; +; Abstract: +; +; This is the code that supports IA32-optimized SetMem service +; +;------------------------------------------------------------------------------ + +; PROC:PRIVATE + .686P + .MMX + .MODEL SMALL + .CODE + +EfiCommonLibSetMem PROTO C Buffer:PTR DWORD, Count:DWORD, Value:BYTE + +;------------------------------------------------------------------------------ +; Procedure: EfiCommonLibSetMem +; +; VOID +; EfiCommonLibSetMem ( +; IN VOID *Buffer, +; IN UINTN Count, +; IN UINT8 Value +; ) +; +; Input: VOID *Buffer - Pointer to buffer to write +; UINTN Count - Number of bytes to write +; UINT8 Value - Value to write +; +; Output: None. +; +; Saves: +; +; Modifies: +; +; Description: This function is an optimized zero-memory function. +; +; Notes: This function tries to zero memory 8 bytes at a time. As a result, +; it first picks up any misaligned bytes, then words, before getting +; in the main loop that does the 8-byte clears. +; +;------------------------------------------------------------------------------ +EfiCommonLibSetMem PROC C Buffer:PTR DWORD, Count:DWORD, Value:BYTE + LOCAL QWordValue:QWORD + LOCAL MmxSave:QWORD + + + mov edx, Count + test edx, edx + je _SetMemDone + + push edi + push ebx + + mov eax, Buffer + mov bl, Value + mov edi, eax + mov bh, bl + + cmp edx, 256 + jb _SetRemindingByte + + and al, 07h + test al, al + je _SetBlock + + mov eax, edi + shr eax, 3 + inc eax + shl eax, 3 + sub eax, edi + cmp eax, edx + jnb _SetRemindingByte + + sub edx, eax + mov ecx, eax + + mov al, bl + rep stosb + +_SetBlock: + mov eax, edx + shr eax, 6 + test eax, eax + je _SetRemindingByte + + shl eax, 6 + sub edx, eax + shr eax, 6 + + mov WORD PTR QWordValue[0], bx + mov WORD PTR QWordValue[2], bx + mov WORD PTR QWordValue[4], bx + mov WORD PTR QWordValue[6], bx + + + movq MmxSave, mm0 + movq mm0, QWordValue + +@@: + movq QWORD PTR ds:[edi], mm0 + movq QWORD PTR ds:[edi+8], mm0 + movq QWORD PTR ds:[edi+16], mm0 + movq QWORD PTR ds:[edi+24], mm0 + movq QWORD PTR ds:[edi+32], mm0 + movq QWORD PTR ds:[edi+40], mm0 + movq QWORD PTR ds:[edi+48], mm0 + movq QWORD PTR ds:[edi+56], mm0 + add edi, 64 + dec eax + jnz @B + +; Restore mm0 + movq mm0, MmxSave + emms ; Exit MMX Instruction + +_SetRemindingByte: + mov ecx, edx + + mov eax, ebx + shl eax, 16 + mov ax, bx + shr ecx, 2 + rep stosd + + mov ecx, edx + and ecx, 3 + rep stosb + + pop ebx + pop edi + +_SetMemDone: + ret 0 + +EfiCommonLibSetMem ENDP + END +
\ No newline at end of file diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMemSSE2.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMemSSE2.asm new file mode 100644 index 0000000..5d6652d --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiSetMemSSE2.asm @@ -0,0 +1,158 @@ + TITLE EfiSetMem.asm: Optimized setmemory routine + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; EfiCommonLibSetMem.asm +; +; Abstract: +; +; This is the code that supports IA32-optimized SetMem service +; +;------------------------------------------------------------------------------ + +; PROC:PRIVATE + .686P + .MMX + .XMM + .MODEL SMALL + .CODE + +EfiCommonLibSetMem PROTO C Buffer:PTR DWORD, Count:DWORD, Value:BYTE + +;------------------------------------------------------------------------------ +; Procedure: EfiCommonLibSetMem +; +; VOID +; EfiCommonLibSetMem ( +; IN VOID *Buffer, +; IN UINTN Count, +; IN UINT8 Value +; ) +; +; Input: VOID *Buffer - Pointer to buffer to write +; UINTN Count - Number of bytes to write +; UINT8 Value - Value to write +; +; Output: None. +; +; Saves: +; +; Modifies: +; +; Description: This function is an optimized zero-memory function. +; +; Notes: This function tries to zero memory 8 bytes at a time. As a result, +; it first picks up any misaligned bytes, then words, before getting +; in the main loop that does the 8-byte clears. +; +;------------------------------------------------------------------------------ +EfiCommonLibSetMem PROC C Buffer:PTR DWORD, Count:DWORD, Value:BYTE + LOCAL QWordValue:QWORD + LOCAL MmxSave:QWORD + + + mov edx, Count + test edx, edx + je _SetMemDone + + push edi + push ebx + + mov eax, Buffer + mov bl, Value + mov edi, eax + mov bh, bl + + cmp edx, 256 + jb _SetRemindingByte + + and al, 0fh + test al, al + je _SetBlock + + mov eax, edi + shr eax, 4 + inc eax + shl eax, 4 + sub eax, edi + cmp eax, edx + jnb _SetRemindingByte + + sub edx, eax + mov ecx, eax + + mov al, bl + rep stosb + +_SetBlock: + mov eax, edx + shr eax, 7 + test eax, eax + je _SetRemindingByte + + shl eax, 7 + sub edx, eax + shr eax, 7 + + mov WORD PTR QWordValue[0], bx + mov WORD PTR QWordValue[2], bx + mov WORD PTR QWordValue[4], bx + mov WORD PTR QWordValue[6], bx + + + movq MmxSave, mm0 + movq mm0, QWordValue + + movq2dq xmm1, mm0 + pshufd xmm1, xmm1, 0 + +@@: + movdqa OWORD PTR ds:[edi], xmm1 + movdqa OWORD PTR ds:[edi+16], xmm1 + movdqa OWORD PTR ds:[edi+32], xmm1 + movdqa OWORD PTR ds:[edi+48], xmm1 + movdqa OWORD PTR ds:[edi+64], xmm1 + movdqa OWORD PTR ds:[edi+80], xmm1 + movdqa OWORD PTR ds:[edi+96], xmm1 + movdqa OWORD PTR ds:[edi+112], xmm1 + add edi, 128 + dec eax + jnz @B + +; Restore mm0 + movq mm0, MmxSave + emms ; Exit MMX Instruction + +_SetRemindingByte: + mov ecx, edx + + mov eax, ebx + shl eax, 16 + mov ax, bx + shr ecx, 2 + rep stosd + + mov ecx, edx + and ecx, 3 + rep stosb + + pop ebx + pop edi + +_SetMemDone: + ret 0 + +EfiCommonLibSetMem ENDP + END +
\ No newline at end of file diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMem.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMem.asm new file mode 100644 index 0000000..7188e29 --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMem.asm @@ -0,0 +1,138 @@ + TITLE EfiZeroMem.asm: Optimized memory-zero routine + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; EfiZeroMem.asm +; +; Abstract: +; +; This is the code that supports IA32-optimized ZeroMem service +; +;------------------------------------------------------------------------------ + +; PROC:PRIVATE + .686P + .MMX + .MODEL SMALL + .CODE + +EfiCommonLibZeroMem PROTO C Buffer:PTR DWORD, Count:DWORD + +;------------------------------------------------------------------------------ +; Procedure: EfiCommonLibZeroMem +; +; VOID +; EfiCommonLibZeroMem ( +; IN VOID *Buffer, +; IN UINTN Count +; ) +; +; Input: VOID *Buffer - Pointer to buffer to clear +; UINTN Count - Number of bytes to clear +; +; Output: None. +; +; Saves: +; +; Modifies: +; +; Description: This function is an optimized zero-memory function. +; +; Notes: This function tries to zero memory 8 bytes at a time. As a result, +; it first picks up any misaligned bytes, then words, before getting +; in the main loop that does the 8-byte clears. +; +;------------------------------------------------------------------------------ +EfiCommonLibZeroMem PROC C Buffer:PTR DWORD, Count:DWORD + LOCAL MmxSave:QWORD + + ; Save edi, then put the buffer pointer into it. + push edi + mov ecx, Count + mov edi, Buffer + + ; Pick up misaligned start bytes (get pointer 4-byte aligned) +_StartByteZero: + mov eax, edi + and al, 3 ; check lower 2 bits of address + test al, al + je _ZeroBlocks ; already aligned? + cmp ecx, 0 + je _ZeroMemDone + + ; Clear the byte memory location + mov BYTE PTR [edi], 0 + inc edi + + ; Decrement our count + dec ecx + jmp _StartByteZero ; back to top of loop + +_ZeroBlocks: + + ; Compute how many 64-byte blocks we can clear + mov edx, ecx + shr ecx, 6 ; convert to 64-byte count + shl ecx, 6 ; convert back to bytes + sub edx, ecx ; subtract from the original count + shr ecx, 6 ; and this is how many 64-byte blocks + + ; If no 64-byte blocks, then skip + cmp ecx, 0 + je _ZeroRemaining + + ; Save mm0 + movq MmxSave, mm0 + + pxor mm0, mm0 ; Clear mm0 + +@@: + movq QWORD PTR ds:[edi], mm0 + movq QWORD PTR ds:[edi+8], mm0 + movq QWORD PTR ds:[edi+16], mm0 + movq QWORD PTR ds:[edi+24], mm0 + movq QWORD PTR ds:[edi+32], mm0 + movq QWORD PTR ds:[edi+40], mm0 + movq QWORD PTR ds:[edi+48], mm0 + movq QWORD PTR ds:[edi+56], mm0 + + add edi, 64 + dec ecx + jnz @B + +; Restore mm0 + movq mm0, MmxSave + emms ; Exit MMX Instruction + +_ZeroRemaining: + ; Zero out as many DWORDS as possible + mov ecx, edx + shr ecx, 2 + xor eax, eax + + rep stosd + + ; Zero out remaining as bytes + mov ecx, edx + and ecx, 03 + + rep stosb + +_ZeroMemDone: + pop edi + + ret + +EfiCommonLibZeroMem ENDP + END diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMemSSE2.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMemSSE2.asm new file mode 100644 index 0000000..a5efd4d --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/EfiZeroMemSSE2.asm @@ -0,0 +1,127 @@ + TITLE EfiZeroMem.asm: Optimized memory-zero routine + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; EfiZeroMem.asm +; +; Abstract: +; +; This is the code that supports IA32-optimized ZeroMem service +; +;------------------------------------------------------------------------------ + +; PROC:PRIVATE + .686P + .XMM + .MODEL SMALL + .CODE + +EfiCommonLibZeroMem PROTO C Buffer:PTR DWORD, Count:DWORD + +;------------------------------------------------------------------------------ +; Procedure: EfiCommonLibZeroMem +; +; VOID +; EfiCommonLibZeroMem ( +; IN VOID *Buffer, +; IN UINTN Count +; ) +; +; Input: VOID *Buffer - Pointer to buffer to clear +; UINTN Count - Number of bytes to clear +; +; Output: None. +; +; Saves: +; +; Modifies: +; +; Description: This function is an optimized zero-memory function. +; +; Notes: This function tries to zero memory 8 bytes at a time. As a result, +; it first picks up any misaligned bytes, then words, before getting +; in the main loop that does the 8-byte clears. +; +;------------------------------------------------------------------------------ +EfiCommonLibZeroMem PROC C Buffer:PTR DWORD, Count:DWORD + + ; Save edi, then put the buffer pointer into it. + push edi + mov ecx, Count + mov edi, Buffer + + ; Pick up misaligned start bytes (get pointer 4-byte aligned) +_StartByteZero: + mov eax, edi + and al, 3 ; check lower 2 bits of address + test al, al + je _ZeroBlocks ; already aligned? + cmp ecx, 0 + je _ZeroMemDone + + ; Clear the byte memory location + mov BYTE PTR [edi], 0 + inc edi + + ; Decrement our count + dec ecx + jmp _StartByteZero ; back to top of loop + +_ZeroBlocks: + + ; Compute how many 64-byte blocks we can clear + mov edx, ecx + shr ecx, 6 ; convert to 64-byte count + shl ecx, 6 ; convert back to bytes + sub edx, ecx ; subtract from the original count + shr ecx, 6 ; and this is how many 64-byte blocks + + ; If no 64-byte blocks, then skip + cmp ecx, 0 + je _ZeroRemaining + + xorps xmm1, xmm1 + +@@: + movdqu OWORD PTR ds:[edi], xmm1 + movdqu OWORD PTR ds:[edi+16], xmm1 + movdqu OWORD PTR ds:[edi+32], xmm1 + movdqu OWORD PTR ds:[edi+48], xmm1 + + add edi, 64 + dec ecx + jnz @B + + +_ZeroRemaining: + ; Zero out as many DWORDS as possible + mov ecx, edx + shr ecx, 2 + xor eax, eax + + rep stosd + + ; Zero out remaining as bytes + mov ecx, edx + and ecx, 03 + + rep stosb + +_ZeroMemDone: + pop edi + + ret + +EfiCommonLibZeroMem ENDP + END diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/GetPowerOfTwo.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/GetPowerOfTwo.asm new file mode 100644 index 0000000..9f0d962 --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/GetPowerOfTwo.asm @@ -0,0 +1,67 @@ + TITLE GetPowerOfTwo.asm: Calculates the power of two value just below input + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2005 Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; GetPowerOfTwo.asm +; +; Abstract: +; +; Calculates the largest integer that is both +; a power of two and less than Input +; +;------------------------------------------------------------------------------ + + .686P + .XMM + .MODEL SMALL + .CODE + +_GetPowerOfTwo PROC +;------------------------------------------------------------------------------ +; UINT32 +; _GetPowerOfTwo ( +; IN UINT32 Input +; ) +; +; Routine Description: +; +; Calculates the largest integer that is both +; a power of two and less than Input +; +; Arguments: +; +; Input - value to calculate power of two +; +; Returns: +; +; the largest integer that is both a power of +; two and less than Input +;------------------------------------------------------------------------------ + xor eax, eax + mov edx, eax + mov ecx, [esp + 8] + jecxz @F + bsr ecx, ecx + bts edx, ecx + jmp @Exit +@@: + mov ecx, [esp + 4] + jecxz @Exit + bsr ecx, ecx + bts eax, ecx +@Exit: + ret +_GetPowerOfTwo ENDP + +END diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/LShiftU64.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/LShiftU64.asm new file mode 100644 index 0000000..de6d17e --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/LShiftU64.asm @@ -0,0 +1,86 @@ + TITLE LShiftU64.asm: 64-bit left shift function for IA-32 + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; LShiftU64.asm +; +; Abstract: +; +; 64-bit left shift function for IA-32 +; +;------------------------------------------------------------------------------ + + .686P + .XMM + .MODEL SMALL + .CODE + +LShiftU64 PROTO C Operand: QWORD, Count: DWORD + +LShiftU64 PROC C Operand: QWORD, Count: DWORD + +;------------------------------------------------------------------------------ +; UINT64 +; LShiftU64 ( +; IN UINT64 Operand, +; IN UINTN Count +; ) +; +; Routine Description: +; +; This routine allows a 64 bit value to be left shifted by 32 bits and +; returns the shifted value. +; Count is valid up 63. (Only Bits 0-5 is valid for Count) +; +; Arguments: +; +; Operand - Value to be shifted +; Count - Number of times to shift left. +; +; Returns: +; +; Value shifted left identified by the Count. +;------------------------------------------------------------------------------ + + push ecx + + mov eax, dword ptr Operand[0] + mov edx, dword ptr Operand[4] + + ; + ; CL is valid from 0 - 31. shld will move EDX:EAX by CL times but EAX is not touched + ; For CL of 32 - 63, it will be shifted 0 - 31 so we will move eax to edx later. + ; + mov ecx, Count + and ecx, 63 + shld edx, eax, cl + shl eax, cl + + ; + ; Since Count is 32 - 63, eax will have been shifted by 0 - 31 + ; If shifted by 32 or more, set lower 32 bits to zero. + ; + cmp ecx, 32 + jc short _LShiftU64_Done + + mov edx, eax + xor eax, eax + +_LShiftU64_Done: + pop ecx + ret + +LShiftU64 ENDP + +END diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/Log2.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/Log2.asm new file mode 100644 index 0000000..167086e --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/Log2.asm @@ -0,0 +1,87 @@ + TITLE Log2.asm: 64-bit integer logarithm function for IA-32 + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; Log2.asm +; +; Abstract: +; +; 64-bit integer logarithm function for IA-32 +; +;------------------------------------------------------------------------------ + + .686P + .XMM + .MODEL SMALL + .CODE + +Log2 PROTO C Operand: QWORD + +Log2 PROC C Operand: QWORD + +;------------------------------------------------------------------------------ +; UINT8 +; Log2 ( +; IN UINT64 Operand +; ) +; +; Routine Description: +; +; Calculates and floors logarithms based on 2 +; +; Arguments: +; +; Operand - value to calculate logarithm +; +; Returns: +; +; The largest integer that is less than or equal +; to the logarithm of Operand based on 2 +;------------------------------------------------------------------------------ + + push ecx + + mov ecx, 64 + + cmp dword ptr Operand[0], 0 + jne _Log2_Wend + cmp dword ptr Operand[4], 0 + jne _Log2_Wend + mov cl, 0FFH + jmp _Log2_Done + +_Log2_Wend: + dec ecx + cmp ecx, 32 + jae _Log2_Higher + bt dword ptr Operand[0], ecx + jmp _Log2_Bit + +_Log2_Higher: + mov eax, ecx + sub eax, 32 + bt dword ptr Operand[4], eax + +_Log2_Bit: + jc _Log2_Done + jmp _Log2_Wend + +_Log2_Done: + mov al, cl + pop ecx + ret + +Log2 ENDP + +END diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/MultU64x32.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/MultU64x32.asm new file mode 100644 index 0000000..351020e --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/MultU64x32.asm @@ -0,0 +1,74 @@ + TITLE MultU64x32.asm: 64-bit Multiplication function for IA-32 + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; MultU64x32.asm +; +; Abstract: +; +; 64-bit Multiplication function for IA-32 +; +;------------------------------------------------------------------------------ + + .686P + .XMM + .MODEL SMALL, C + .CODE + +MultU64x32 PROTO C Multiplicand: QWORD, Multiplier: DWORD + +MultU64x32 PROC C Multiplicand: QWORD, Multiplier: DWORD + +;------------------------------------------------------------------------------ +; UINT64 +; MultU64x32 ( +; IN UINT64 Multiplicand, +; IN UINTN Multiplier +; ) +; +; Routine Description: +; +; This routine allows a 64 bit value to be multiplied with a 32 bit +; value returns 64bit result. +; No checking if the result is greater than 64bits +; +; Arguments: +; +; Multiplicand - multiplicand +; Multiplier - multiplier +; +; Returns: +; +; Multiplicand * Multiplier +;------------------------------------------------------------------------------ + + mov eax, dword ptr Multiplicand[0] + mul Multiplier + push eax + push edx + mov eax, dword ptr Multiplicand[4] + mul Multiplier + ; + ; The value in edx stored by second multiplication overflows + ; the output and should be discarded. So here we overwrite it + ; with the edx value of first multiplication. + ; + pop edx + add edx, eax + pop eax + ret + +MultU64x32 ENDP + +END diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/Power10U64.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/Power10U64.asm new file mode 100644 index 0000000..0346e03 --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/Power10U64.asm @@ -0,0 +1,72 @@ + TITLE Power10U64.asm: calculates Operand * 10 ^ Power + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; Power10U64.asm +; +; Abstract: +; +; Calculates Operand * 10 ^ Power +; +;------------------------------------------------------------------------------ + + .686P + .XMM + .MODEL SMALL + .CODE + +Power10U64 PROTO C Operand: QWORD, Power: DWORD +MultU64x32 PROTO C Multiplicand: QWORD, Multiplier: DWORD + +Power10U64 PROC C Operand: QWORD, Power: DWORD + +;------------------------------------------------------------------------------ +; UINT64 +; Power10U64 ( +; IN UINT64 Operand, +; IN UINTN Power +; ) +; +; Routine Description: +; +; Raise 10 to the power of Power, and multiply the result with Operand +; +; Arguments: +; +; Operand - multiplicand +; Power - power +; +; Returns: +; +; Operand * 10 ^ Power +;------------------------------------------------------------------------------ + + push ecx + + mov ecx, Power + jcxz _Power10U64_Done + +_Power10U64_Wend: + invoke MultU64x32, Operand, 10 + mov dword ptr Operand[0], eax + mov dword ptr Operand[4], edx + loop _Power10U64_Wend + +_Power10U64_Done: + pop ecx + ret + +Power10U64 ENDP + +END diff --git a/EDK/Foundation/Library/EfiCommonLib/Ia32/RShiftU64.asm b/EDK/Foundation/Library/EfiCommonLib/Ia32/RShiftU64.asm new file mode 100644 index 0000000..de05d20 --- /dev/null +++ b/EDK/Foundation/Library/EfiCommonLib/Ia32/RShiftU64.asm @@ -0,0 +1,86 @@ + TITLE RShiftU64.asm: 64-bit right shift function for IA-32 + +;------------------------------------------------------------------------------ +; +; Copyright (c) 2004, Intel Corporation +; All rights reserved. This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; RShiftU64.asm +; +; Abstract: +; +; 64-bit right shift function for IA-32 +; +;------------------------------------------------------------------------------ + + .686P + .XMM + .MODEL SMALL + .CODE + +RShiftU64 PROTO C Operand: QWORD, Count: DWORD + +RShiftU64 PROC C Operand: QWORD, Count: DWORD + +;------------------------------------------------------------------------------ +; UINT64 +; RShiftU64 ( +; IN UINT64 Operand, +; IN UINTN Count +; ) +; +; Routine Description: +; +; This routine allows a 64 bit value to be right shifted by 32 bits and returns the +; shifted value. +; Count is valid up 63. (Only Bits 0-5 is valid for Count) +; +; Arguments: +; +; Operand - Value to be shifted +; Count - Number of times to shift right. +; +; Returns: +; +; Value shifted right identified by the Count. +;------------------------------------------------------------------------------ + + push ecx + + mov eax, dword ptr Operand[0] + mov edx, dword ptr Operand[4] + + ; + ; CL is valid from 0 - 31. shld will move EDX:EAX by CL times but EDX is not touched + ; For CL of 32 - 63, it will be shifted 0 - 31 so we will move edx to eax later. + ; + mov ecx, Count + and ecx, 63 + shrd eax, edx, cl + shr edx, cl + + cmp ecx, 32 + jc short _RShiftU64_Done + + ; + ; Since Count is 32 - 63, edx will have been shifted by 0 - 31 + ; If shifted by 32 or more, set upper 32 bits to zero. + ; + mov eax, edx + xor edx, edx + +_RShiftU64_Done: + pop ecx + ret + +RShiftU64 ENDP + +END |