X86: Implement the X86 sse2 haddpd instruction

This patch implements the haddpd instruction. It fixes the problem in the previous version (pointed out by Gabe Black) where an incorrect result would happen if you issue the instruction with the same argument twice, i.e. "haddpd %xmm0,%xmm0" This instruction is used by many spec2k benchmarks.
author: Vince Weaver <vince@csl.cornell.edu> 2009-10-30 14:19:06 -0400
committer: Vince Weaver <vince@csl.cornell.edu> 2009-10-30 14:19:06 -0400
commit: b2067840a6ad7e70495ad4dc6c74bf080e68133f (patch)
tree: 9f759489d90cf18e301f3f3c6770d9c5f52bcadf
parent: cf269025f9e51eebb56d04fea6994fd72b1be4f9 (diff)
download: gem5-b2067840a6ad7e70495ad4dc6c74bf080e68133f.tar.xz
2 files changed, 22 insertions, 2 deletions
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
index 27aabaccc..5512f417c 100644
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -707,7 +707,7 @@
                     }
                     // operand size (0x66)
                     0x1: decode OPCODE_OP_BOTTOM3 {
-                        0x4: WarnUnimpl::haddpd_Vo_Wo();
+                        0x4: HADDPD(Vo,Wo);
                         0x5: WarnUnimpl::hsubpd_Vo_Wo();
                         0x6: WarnUnimpl::movd_Ed_Vd();
                         0x7: MOVDQA(Wo,Vo);
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
index 8b307d3da..adf7650b9 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
@@ -55,5 +55,25 @@
 
 microcode = '''
 # HADDPS
-# HADDPD
+
+def macroop HADDPD_XMM_XMM {
+    maddf ufp1, xmmh , xmml, size=8, ext=1
+    maddf xmmh, xmmlm, xmmhm, size=8, ext=1
+    movfp xmml, ufp1
+};
+
+def macroop HADDPD_XMM_M {
+    ldfp ufp1, seg, sib, disp, dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT+8", dataSize=8
+    maddf xmml, xmmh, xmml, size=8, ext=1
+    maddf xmmh, ufp1, ufp2, size=8, ext=1
+};
+
+def macroop HADDPD_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, disp, dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT+8", dataSize=8
+    maddf xmml, xmmh, xmml, size=8, ext=1
+    maddf xmmh, ufp1, ufp2, size=8, ext=1
+};
 '''
author	Vince Weaver <vince@csl.cornell.edu>	2009-10-30 14:19:06 -0400
committer	Vince Weaver <vince@csl.cornell.edu>	2009-10-30 14:19:06 -0400
commit	b2067840a6ad7e70495ad4dc6c74bf080e68133f (patch)
tree	9f759489d90cf18e301f3f3c6770d9c5f52bcadf
parent	cf269025f9e51eebb56d04fea6994fd72b1be4f9 (diff)
download	gem5-b2067840a6ad7e70495ad4dc6c74bf080e68133f.tar.xz