summaryrefslogtreecommitdiff
path: root/src/arch
diff options
context:
space:
mode:
authorSteve Reinhardt <steve.reinhardt@amd.com>2015-10-06 17:26:50 -0700
committerSteve Reinhardt <steve.reinhardt@amd.com>2015-10-06 17:26:50 -0700
commita2c875c746a7b9b5dcb94fd93d94ab70286dbbb4 (patch)
tree03ac1c0befec0a164e233b655759efac0f3207c0 /src/arch
parent57b9f53afa5660152a77b7f3b7affb39f5b0e176 (diff)
downloadgem5-a2c875c746a7b9b5dcb94fd93d94ab70286dbbb4.tar.xz
x86: implement rcpps and rcpss SSE insts
These are packed single-precision approximate reciprocal operations, vector and scalar versions, respectively. This code was basically developed by copying the code for sqrtps and sqrtss. The mrcp micro-op was simplified relative to msqrt since there are no double-precision versions of this operation.
Diffstat (limited to 'src/arch')
-rw-r--r--src/arch/x86/isa/decoder/two_byte_opcodes.isa4
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/arithmetic/reciprocal_estimation.py39
-rw-r--r--src/arch/x86/isa/microops/mediaop.isa39
3 files changed, 77 insertions, 5 deletions
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
index 0ba7434e8..4a21e2900 100644
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -463,7 +463,7 @@
0x0: MOVMSKPS(Gd,VRo);
0x1: SQRTPS(Vo,Wo);
0x2: WarnUnimpl::rqsrtps_Vo_Wo();
- 0x3: WarnUnimpl::rcpps_Vo_Wo();
+ 0x3: RCPPS(Vo,Wo);
0x4: ANDPS(Vo,Wo);
0x5: ANDNPS(Vo,Wo);
0x6: ORPS(Vo,Wo);
@@ -473,7 +473,7 @@
0x4: decode OPCODE_OP_BOTTOM3 {
0x1: SQRTSS(Vd,Wd);
0x2: WarnUnimpl::rsqrtss_Vd_Wd();
- 0x3: WarnUnimpl::rcpss_Vd_Wd();
+ 0x3: RCPSS(Vd,Wd);
default: UD2();
}
// operand size (0x66)
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/reciprocal_estimation.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/reciprocal_estimation.py
index 6e0d7fbb6..666c45ca1 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/reciprocal_estimation.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/reciprocal_estimation.py
@@ -1,4 +1,6 @@
# Copyright (c) 2007 The Hewlett-Packard Development Company
+# Copyright (c) 2015 Advanced Micro Devices, Inc.
+#
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -34,8 +36,41 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Gabe Black
+# Steve Reinhardt
microcode = '''
-# RCPPS
-# RCPSS
+def macroop RCPSS_XMM_XMM {
+ mrcp xmml, xmmlm, size=4, ext=Scalar
+};
+
+def macroop RCPSS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mrcp xmml, ufp1, size=4, ext=Scalar
+};
+
+def macroop RCPSS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mrcp xmml, ufp1, size=4, ext=Scalar
+};
+
+def macroop RCPPS_XMM_XMM {
+ mrcp xmml, xmmlm, size=4, ext=0
+ mrcp xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop RCPPS_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mrcp xmml, ufp1, size=4, ext=0
+ mrcp xmmh, ufp2, size=4, ext=0
+};
+
+def macroop RCPPS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mrcp xmml, ufp1, size=4, ext=0
+ mrcp xmmh, ufp2, size=4, ext=0
+};
'''
diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa
index e382151ef..e5f04109f 100644
--- a/src/arch/x86/isa/microops/mediaop.isa
+++ b/src/arch/x86/isa/microops/mediaop.isa
@@ -1,4 +1,6 @@
-/// Copyright (c) 2009 The Regents of The University of Michigan
+// Copyright (c) 2009 The Regents of The University of Michigan
+// Copyright (c) 2015 Advanced Micro Devices, Inc.
+//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@@ -691,6 +693,41 @@ let {{
FpDestReg_uqw = result;
'''
+ # compute approximate reciprocal --- single-precision only
+ class Mrcp(MediaOp):
+ def __init__(self, dest, src, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Mrcp, self).__init__(dest, src,\
+ "InstRegIndex(0)", size, destSize, srcSize, ext)
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+
+ assert(srcSize == 4); // ISA defines single-precision only
+ assert(srcSize == destSize);
+ const int size = 4;
+ const int sizeBits = size * 8;
+ int items = numItems(size);
+ uint64_t result = FpDestReg_uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
+
+ floatInt fi;
+ fi.i = argBits;
+ // This is more accuracy than HW provides, but oh well
+ fi.f = 1.0 / fi.f;
+ argBits = fi.i;
+ result = insertBits(result, hiIndex, loIndex, argBits);
+ }
+ FpDestReg_uqw = result;
+ '''
+
class Maddf(MediaOp):
code = '''
union floatInt