diff options
author | Steve Reinhardt <steve.reinhardt@amd.com> | 2015-10-06 17:26:50 -0700 |
---|---|---|
committer | Steve Reinhardt <steve.reinhardt@amd.com> | 2015-10-06 17:26:50 -0700 |
commit | a2c875c746a7b9b5dcb94fd93d94ab70286dbbb4 (patch) | |
tree | 03ac1c0befec0a164e233b655759efac0f3207c0 /src/arch/x86/isa/microops/mediaop.isa | |
parent | 57b9f53afa5660152a77b7f3b7affb39f5b0e176 (diff) | |
download | gem5-a2c875c746a7b9b5dcb94fd93d94ab70286dbbb4.tar.xz |
x86: implement rcpps and rcpss SSE insts
These are packed single-precision approximate reciprocal operations,
vector and scalar versions, respectively.
This code was basically developed by copying the code for
sqrtps and sqrtss. The mrcp micro-op was simplified relative to
msqrt since there are no double-precision versions of this operation.
Diffstat (limited to 'src/arch/x86/isa/microops/mediaop.isa')
-rw-r--r-- | src/arch/x86/isa/microops/mediaop.isa | 39 |
1 files changed, 38 insertions, 1 deletions
diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa index e382151ef..e5f04109f 100644 --- a/src/arch/x86/isa/microops/mediaop.isa +++ b/src/arch/x86/isa/microops/mediaop.isa @@ -1,4 +1,6 @@ -/// Copyright (c) 2009 The Regents of The University of Michigan +// Copyright (c) 2009 The Regents of The University of Michigan +// Copyright (c) 2015 Advanced Micro Devices, Inc. +// // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -691,6 +693,41 @@ let {{ FpDestReg_uqw = result; ''' + # compute approximate reciprocal --- single-precision only + class Mrcp(MediaOp): + def __init__(self, dest, src, \ + size = None, destSize = None, srcSize = None, ext = None): + super(Mrcp, self).__init__(dest, src,\ + "InstRegIndex(0)", size, destSize, srcSize, ext) + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + + assert(srcSize == 4); // ISA defines single-precision only + assert(srcSize == destSize); + const int size = 4; + const int sizeBits = size * 8; + int items = numItems(size); + uint64_t result = FpDestReg_uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex); + + floatInt fi; + fi.i = argBits; + // This is more accuracy than HW provides, but oh well + fi.f = 1.0 / fi.f; + argBits = fi.i; + result = insertBits(result, hiIndex, loIndex, argBits); + } + FpDestReg_uqw = result; + ''' + class Maddf(MediaOp): code = ''' union floatInt |