summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPouya Fotouhi <Pouya.Fotouhi@amd.com>2019-08-20 19:54:29 -0500
committerPouya Fotouhi <pfotouhi@ucdavis.edu>2019-09-05 05:48:08 +0000
commit8d439c29ed86f55c454396fdd03d606cfa13a95a (patch)
tree3ce2a08873a3388f3acdb1bac86d8eb117642497
parent3edfacea9c30b0f1c38c3296e7e4110251a247a5 (diff)
downloadgem5-8d439c29ed86f55c454396fdd03d606cfa13a95a.tar.xz
arch-x86: implement movntq/movntdq instructions
Non-temporal quadword/double-quadword move instructions. This change ignores the non-temporal hint and instructions are implemented to send cacheable request to memory. This would have some "performance" impact (i.e. having some cache pollution) to get better "correctness" in behavior. Change-Id: I2052ac0970f61a54bafb7332762debcb7103202d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/20288 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Jason Lowe-Power <jason@lowepower.com> Maintainer: Jason Lowe-Power <jason@lowepower.com>
-rw-r--r--src/arch/x86/isa/decoder/two_byte_opcodes.isa10
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py16
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py12
3 files changed, 34 insertions, 4 deletions
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
index 7a4f9e198..a23531c13 100644
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -978,7 +978,9 @@
0x3: PAVGW(Pq,Qq);
0x4: PMULHUW(Pq,Qq);
0x5: PMULHW(Pq,Qq);
- 0x7: WarnUnimpl::movntq_Mq_Pq();
+ //Non-temporal hint is ignored since we don't have
+ //proper support for it in the memory system.
+ 0x7: MOVNTQ(Mq,Pq);
default: UD2();
}
// repe (0xF3)
@@ -995,7 +997,11 @@
0x4: PMULHUW(Vo,Wo);
0x5: PMULHW(Vo,Wo);
0x6: CVTTPD2DQ(Vo,Wo);
- 0x7: WarnUnimpl::movntdq_Mo_Vo();
+ //MOVNTDQ should really use size o (octword), but
+ //because it is split in two, we use q (quadword).
+ //Non-temporal hint is ignored since we don't have
+ //proper support for it in the memory system.
+ 0x7: MOVNTDQ(Mq,Vq);
}
// repne (0xF2)
0x8: decode OPCODE_OP_BOTTOM3 {
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py
index 792153a92..08296bd56 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py
@@ -34,9 +34,23 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Gabe Black
+# Pouya Fotouhi
microcode = '''
-# MOVNTDQ
+def macroop MOVNTDQ_M_XMM {
+ warn "MOVNTDQ: Ignoring non-temporal hint, modeling as cacheable!"
+ cda seg, sib, "DISPLACEMENT + 8", dataSize=8
+ stfp xmml, seg, sib, "DISPLACEMENT", dataSize=8
+ stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVNTDQ_P_XMM {
+ warn "MOVNTDQ_P: Ignoring non-temporal hint, modeling as cacheable!"
+ rdip t7
+ cda seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8
+ stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+};
def macroop MASKMOVDQU_XMM_XMM {
ldfp ufp1, ds, [1, t0, rdi], dataSize=8
diff --git a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py
index f068a0693..ccd37f165 100644
--- a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py
+++ b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py
@@ -34,9 +34,19 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Gabe Black
+# Pouya Fotouhi
microcode = '''
-# MOVNTQ
+def macroop MOVNTQ_M_MMX {
+ warn "MOVNTQ: Ignoring non-temporal hint, modeling as cacheable!"
+ stfp mmx, seg, sib, "DISPLACEMENT", dataSize=8
+};
+
+def macroop MOVNTQ_P_MMX {
+ warn "MOVNTQ_P: Ignoring non-temporal hint, modeling as cacheable!"
+ rdip t7
+ stfp mmx, seg, riprel, "DISPLACEMENT", dataSize=8
+};
def macroop MASKMOVQ_MMX_MMX {
ldfp ufp1, ds, [1, t0, rdi], dataSize=8