summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNilay Vaish <nilay@cs.wisc.edu>2011-11-03 22:52:21 -0500
committerNilay Vaish <nilay@cs.wisc.edu>2011-11-03 22:52:21 -0500
commit582ea4d5431f9fa9edbeb16835b04171647ea18b (patch)
treea9a3ae50ff09f7791525cf8313d8afec67f9f3e3
parentfb5c095cd53f4f16e139d9e959c41e089ff79896 (diff)
downloadgem5-582ea4d5431f9fa9edbeb16835b04171647ea18b.tar.xz
x86: Add microop for fence
This patch adds a new microop for memory barrier. The microop itself does nothing, but since it is marked as a memory barrier, the O3 CPU should flush all the pending loads and stores before the fence to the memory system.
-rw-r--r--src/arch/x86/isa/insts/general_purpose/arithmetic/add_and_subtract.py36
-rw-r--r--src/arch/x86/isa/insts/general_purpose/arithmetic/increment_and_decrement.py8
-rw-r--r--src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py24
-rw-r--r--src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py12
-rw-r--r--src/arch/x86/isa/insts/general_purpose/logical.py28
-rw-r--r--src/arch/x86/isa/insts/general_purpose/semaphores.py8
-rw-r--r--src/arch/x86/isa/microops/specop.isa53
7 files changed, 169 insertions, 0 deletions
diff --git a/src/arch/x86/isa/insts/general_purpose/arithmetic/add_and_subtract.py b/src/arch/x86/isa/insts/general_purpose/arithmetic/add_and_subtract.py
index 9fc3e9035..68031c76c 100644
--- a/src/arch/x86/isa/insts/general_purpose/arithmetic/add_and_subtract.py
+++ b/src/arch/x86/isa/insts/general_purpose/arithmetic/add_and_subtract.py
@@ -67,18 +67,22 @@ def macroop ADD_P_I
def macroop ADD_LOCKED_M_I
{
limm t2, imm
+ mfence
ldstl t1, seg, sib, disp
add t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop ADD_LOCKED_P_I
{
rdip t7
limm t2, imm
+ mfence
ldstl t1, seg, riprel, disp
add t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop ADD_M_R
@@ -98,17 +102,21 @@ def macroop ADD_P_R
def macroop ADD_LOCKED_M_R
{
+ mfence
ldstl t1, seg, sib, disp
add t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop ADD_LOCKED_P_R
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
add t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop ADD_R_M
@@ -168,18 +176,22 @@ def macroop SUB_P_I
def macroop SUB_LOCKED_M_I
{
limm t2, imm
+ mfence
ldstl t1, seg, sib, disp
sub t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop SUB_LOCKED_P_I
{
rdip t7
limm t2, imm
+ mfence
ldstl t1, seg, riprel, disp
sub t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop SUB_M_R
@@ -199,17 +211,21 @@ def macroop SUB_P_R
def macroop SUB_LOCKED_M_R
{
+ mfence
ldstl t1, seg, sib, disp
sub t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop SUB_LOCKED_P_R
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
sub t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop ADC_R_R
@@ -243,18 +259,22 @@ def macroop ADC_P_I
def macroop ADC_LOCKED_M_I
{
limm t2, imm
+ mfence
ldstl t1, seg, sib, disp
adc t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop ADC_LOCKED_P_I
{
rdip t7
limm t2, imm
+ mfence
ldstl t1, seg, riprel, disp
adc t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop ADC_M_R
@@ -274,17 +294,21 @@ def macroop ADC_P_R
def macroop ADC_LOCKED_M_R
{
+ mfence
ldstl t1, seg, sib, disp
adc t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop ADC_LOCKED_P_R
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
adc t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop ADC_R_M
@@ -344,18 +368,22 @@ def macroop SBB_P_I
def macroop SBB_LOCKED_M_I
{
limm t2, imm
+ mfence
ldstl t1, seg, sib, disp
sbb t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop SBB_LOCKED_P_I
{
rdip t7
limm t2, imm
+ mfence
ldstl t1, seg, riprel, disp
sbb t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop SBB_M_R
@@ -375,17 +403,21 @@ def macroop SBB_P_R
def macroop SBB_LOCKED_M_R
{
+ mfence
ldstl t1, seg, sib, disp
sbb t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop SBB_LOCKED_P_R
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
sbb t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop NEG_R
@@ -410,16 +442,20 @@ def macroop NEG_P
def macroop NEG_LOCKED_M
{
+ mfence
ldstl t1, seg, sib, disp
sub t1, t0, t1, flags=(CF,OF,SF,ZF,AF,PF)
stul t1, seg, sib, disp
+ mfence
};
def macroop NEG_LOCKED_P
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
sub t1, t0, t1, flags=(CF,OF,SF,ZF,AF,PF)
stul t1, seg, riprel, disp
+ mfence
};
'''
diff --git a/src/arch/x86/isa/insts/general_purpose/arithmetic/increment_and_decrement.py b/src/arch/x86/isa/insts/general_purpose/arithmetic/increment_and_decrement.py
index f27cd7008..515082d64 100644
--- a/src/arch/x86/isa/insts/general_purpose/arithmetic/increment_and_decrement.py
+++ b/src/arch/x86/isa/insts/general_purpose/arithmetic/increment_and_decrement.py
@@ -58,17 +58,21 @@ def macroop INC_P
def macroop INC_LOCKED_M
{
+ mfence
ldstl t1, seg, sib, disp
addi t1, t1, 1, flags=(OF, SF, ZF, AF, PF)
stul t1, seg, sib, disp
+ mfence
};
def macroop INC_LOCKED_P
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
addi t1, t1, 1, flags=(OF, SF, ZF, AF, PF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop DEC_R
@@ -93,16 +97,20 @@ def macroop DEC_P
def macroop DEC_LOCKED_M
{
+ mfence
ldstl t1, seg, sib, disp
subi t1, t1, 1, flags=(OF, SF, ZF, AF, PF)
stul t1, seg, sib, disp
+ mfence
};
def macroop DEC_LOCKED_P
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
subi t1, t1, 1, flags=(OF, SF, ZF, AF, PF)
stul t1, seg, riprel, disp
+ mfence
};
'''
diff --git a/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py b/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py
index 66eb0f8a2..f69e1dc48 100644
--- a/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py
+++ b/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py
@@ -114,10 +114,12 @@ def macroop BTC_LOCKED_M_I {
limm t1, imm, dataSize=asz
limm t4, 1
roli t4, t4, imm
+ mfence
ldstl t1, seg, sib, disp
sexti t0, t1, imm, flags=(CF,)
xor t1, t1, t4
stul t1, seg, sib, disp
+ mfence
};
def macroop BTC_LOCKED_P_I {
@@ -125,10 +127,12 @@ def macroop BTC_LOCKED_P_I {
limm t1, imm, dataSize=asz
limm t4, 1
roli t4, t4, imm
+ mfence
ldstl t1, seg, riprel, disp
sexti t0, t1, imm, flags=(CF,)
xor t1, t1, t4
stul t1, seg, riprel, disp
+ mfence
};
def macroop BTC_R_R {
@@ -168,10 +172,12 @@ def macroop BTC_LOCKED_M_R {
lea t3, flatseg, [dsz, t3, base], dataSize=asz
limm t4, 1
rol t4, t4, reg
+ mfence
ldstl t1, seg, [scale, index, t3], disp
sext t0, t1, reg, flags=(CF,)
xor t1, t1, t4
stul t1, seg, [scale, index, t3], disp
+ mfence
};
def macroop BTC_LOCKED_P_R {
@@ -180,10 +186,12 @@ def macroop BTC_LOCKED_P_R {
srai t3, t2, ldsz, dataSize=asz
limm t4, 1
rol t4, t4, reg
+ mfence
ldstl t1, seg, [dsz, t3, t7], disp
sext t0, t1, reg, flags=(CF,)
xor t1, t1, t4
stul t1, seg, [dsz, t3, t7], disp
+ mfence
};
def macroop BTR_R_I {
@@ -218,10 +226,12 @@ def macroop BTR_LOCKED_M_I {
limm t1, imm, dataSize=asz
limm t4, "(uint64_t(-(2ULL)))"
roli t4, t4, imm
+ mfence
ldstl t1, seg, sib, disp
sexti t0, t1, imm, flags=(CF,)
and t1, t1, t4
stul t1, seg, sib, disp
+ mfence
};
def macroop BTR_LOCKED_P_I {
@@ -229,10 +239,12 @@ def macroop BTR_LOCKED_P_I {
limm t1, imm, dataSize=asz
limm t4, "(uint64_t(-(2ULL)))"
roli t4, t4, imm
+ mfence
ldstl t1, seg, riprel, disp
sexti t0, t1, imm, flags=(CF,)
and t1, t1, t4
stul t1, seg, riprel, disp
+ mfence
};
def macroop BTR_R_R {
@@ -272,10 +284,12 @@ def macroop BTR_LOCKED_M_R {
lea t3, flatseg, [dsz, t3, base], dataSize=asz
limm t4, "(uint64_t(-(2ULL)))"
rol t4, t4, reg
+ mfence
ldstl t1, seg, [scale, index, t3], disp
sext t0, t1, reg, flags=(CF,)
and t1, t1, t4
stul t1, seg, [scale, index, t3], disp
+ mfence
};
def macroop BTR_LOCKED_P_R {
@@ -284,10 +298,12 @@ def macroop BTR_LOCKED_P_R {
srai t3, t2, ldsz, dataSize=asz
limm t4, "(uint64_t(-(2ULL)))"
rol t4, t4, reg
+ mfence
ldstl t1, seg, [dsz, t3, t7], disp
sext t0, t1, reg, flags=(CF,)
and t1, t1, t4
stul t1, seg, [dsz, t3, t7], disp
+ mfence
};
def macroop BTS_R_I {
@@ -322,10 +338,12 @@ def macroop BTS_LOCKED_M_I {
limm t1, imm, dataSize=asz
limm t4, 1
roli t4, t4, imm
+ mfence
ldstl t1, seg, sib, disp
sexti t0, t1, imm, flags=(CF,)
or t1, t1, t4
stul t1, seg, sib, disp
+ mfence
};
def macroop BTS_LOCKED_P_I {
@@ -333,10 +351,12 @@ def macroop BTS_LOCKED_P_I {
limm t1, imm, dataSize=asz
limm t4, 1
roli t4, t4, imm
+ mfence
ldstl t1, seg, riprel, disp
sexti t0, t1, imm, flags=(CF,)
or t1, t1, t4
stul t1, seg, riprel, disp
+ mfence
};
def macroop BTS_R_R {
@@ -377,10 +397,12 @@ def macroop BTS_LOCKED_M_R {
lea t3, flatseg, [dsz, t3, base], dataSize=asz
limm t4, 1
rol t4, t4, reg
+ mfence
ldstl t1, seg, [scale, index, t3], disp
sext t0, t1, reg, flags=(CF,)
or t1, t1, t4
stul t1, seg, [scale, index, t3], disp
+ mfence
};
def macroop BTS_LOCKED_P_R {
@@ -390,9 +412,11 @@ def macroop BTS_LOCKED_P_R {
lea t3, flatseg, [dsz, t3, base], dataSize=asz
limm t4, 1
rol t4, t4, reg
+ mfence
ldstl t1, seg, [1, t3, t7], disp
sext t0, t1, reg, flags=(CF,)
or t1, t1, t4
stul t1, seg, [1, t3, t7], disp
+ mfence
};
'''
diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py
index 6504b5ab4..1518ce5e0 100644
--- a/src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py
+++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py
@@ -50,46 +50,58 @@ def macroop XCHG_R_R
def macroop XCHG_R_M
{
+ mfence
ldstl t1, seg, sib, disp
stul reg, seg, sib, disp
+ mfence
mov reg, reg, t1
};
def macroop XCHG_R_P
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
stul reg, seg, riprel, disp
+ mfence
mov reg, reg, t1
};
def macroop XCHG_M_R
{
+ mfence
ldstl t1, seg, sib, disp
stul reg, seg, sib, disp
+ mfence
mov reg, reg, t1
};
def macroop XCHG_P_R
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
stul reg, seg, riprel, disp
+ mfence
mov reg, reg, t1
};
def macroop XCHG_LOCKED_M_R
{
+ mfence
ldstl t1, seg, sib, disp
stul reg, seg, sib, disp
+ mfence
mov reg, reg, t1
};
def macroop XCHG_LOCKED_P_R
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
stul reg, seg, riprel, disp
+ mfence
mov reg, reg, t1
};
'''
diff --git a/src/arch/x86/isa/insts/general_purpose/logical.py b/src/arch/x86/isa/insts/general_purpose/logical.py
index b8d442a02..49dea86e5 100644
--- a/src/arch/x86/isa/insts/general_purpose/logical.py
+++ b/src/arch/x86/isa/insts/general_purpose/logical.py
@@ -61,18 +61,22 @@ def macroop OR_P_I
def macroop OR_LOCKED_M_I
{
limm t2, imm
+ mfence
ldstl t1, seg, sib, disp
or t1, t1, t2, flags=(OF,SF,ZF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop OR_LOCKED_P_I
{
limm t2, imm
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
or t1, t1, t2, flags=(OF,SF,ZF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop OR_M_R
@@ -92,17 +96,21 @@ def macroop OR_P_R
def macroop OR_LOCKED_M_R
{
+ mfence
ldstl t1, seg, sib, disp
or t1, t1, reg, flags=(OF,SF,ZF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop OR_LOCKED_P_R
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
or t1, t1, reg, flags=(OF,SF,ZF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop OR_R_M
@@ -155,18 +163,22 @@ def macroop XOR_P_I
def macroop XOR_LOCKED_M_I
{
limm t2, imm
+ mfence
ldstl t1, seg, sib, disp
xor t1, t1, t2, flags=(OF,SF,ZF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop XOR_LOCKED_P_I
{
limm t2, imm
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
xor t1, t1, t2, flags=(OF,SF,ZF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop XOR_M_R
@@ -186,17 +198,21 @@ def macroop XOR_P_R
def macroop XOR_LOCKED_M_R
{
+ mfence
ldstl t1, seg, sib, disp
xor t1, t1, reg, flags=(OF,SF,ZF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop XOR_LOCKED_P_R
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
xor t1, t1, reg, flags=(OF,SF,ZF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop XOR_R_M
@@ -255,19 +271,23 @@ def macroop AND_P_I
def macroop AND_LOCKED_M_I
{
+ mfence
ldstl t2, seg, sib, disp
limm t1, imm
and t2, t2, t1, flags=(OF,SF,ZF,PF,CF)
stul t2, seg, sib, disp
+ mfence
};
def macroop AND_LOCKED_P_I
{
rdip t7
+ mfence
ldstl t2, seg, riprel, disp
limm t1, imm
and t2, t2, t1, flags=(OF,SF,ZF,PF,CF)
stul t2, seg, riprel, disp
+ mfence
};
def macroop AND_M_R
@@ -287,17 +307,21 @@ def macroop AND_P_R
def macroop AND_LOCKED_M_R
{
+ mfence
ldstl t1, seg, sib, disp
and t1, t1, reg, flags=(OF,SF,ZF,PF,CF)
stul t1, seg, sib, disp
+ mfence
};
def macroop AND_LOCKED_P_R
{
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
and t1, t1, reg, flags=(OF,SF,ZF,PF,CF)
stul t1, seg, riprel, disp
+ mfence
};
def macroop NOT_R
@@ -326,17 +350,21 @@ def macroop NOT_P
def macroop NOT_LOCKED_M
{
limm t1, -1
+ mfence
ldstl t2, seg, sib, disp
xor t2, t2, t1
stul t2, seg, sib, disp
+ mfence
};
def macroop NOT_LOCKED_P
{
limm t1, -1
rdip t7
+ mfence
ldstl t2, seg, riprel, disp
xor t2, t2, t1
stul t2, seg, riprel, disp
+ mfence
};
'''
diff --git a/src/arch/x86/isa/insts/general_purpose/semaphores.py b/src/arch/x86/isa/insts/general_purpose/semaphores.py
index 072e28de6..17bee7fb7 100644
--- a/src/arch/x86/isa/insts/general_purpose/semaphores.py
+++ b/src/arch/x86/isa/insts/general_purpose/semaphores.py
@@ -62,21 +62,25 @@ def macroop CMPXCHG_P_R {
};
def macroop CMPXCHG_LOCKED_M_R {
+ mfence
ldstl t1, seg, sib, disp
sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
mov t1, t1, reg, flags=(CZF,)
stul t1, seg, sib, disp
+ mfence
mov rax, rax, t1, flags=(nCZF,)
};
def macroop CMPXCHG_LOCKED_P_R {
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
mov t1, t1, reg, flags=(CZF,)
stul t1, seg, riprel, disp
+ mfence
mov rax, rax, t1, flags=(nCZF,)
};
@@ -96,17 +100,21 @@ def macroop XADD_P_R {
};
def macroop XADD_LOCKED_M_R {
+ mfence
ldstl t1, seg, sib, disp
add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
stul t2, seg, sib, disp
+ mfence
mov reg, reg, t1
};
def macroop XADD_LOCKED_P_R {
rdip t7
+ mfence
ldstl t1, seg, riprel, disp
add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
stul t2, seg, riprel, disp
+ mfence
mov reg, reg, t1
};
diff --git a/src/arch/x86/isa/microops/specop.isa b/src/arch/x86/isa/microops/specop.isa
index 52420f175..5c242e2c9 100644
--- a/src/arch/x86/isa/microops/specop.isa
+++ b/src/arch/x86/isa/microops/specop.isa
@@ -1,4 +1,5 @@
// Copyright (c) 2007-2008 The Hewlett-Packard Development Company
+// Copyright (c) 2011 Mark D. Hill and David A. Wood
// All rights reserved.
//
// The license below extends only to copyright in the software and shall
@@ -203,3 +204,55 @@ let {{
microopClasses["halt"] = Halt
}};
+
+def template MicroFenceOpDeclare {{
+ class %(class_name)s : public X86ISA::X86MicroopBase
+ {
+ public:
+ %(class_name)s(ExtMachInst _machInst,
+ const char * instMnem,
+ uint64_t setFlags);
+
+ %(BasicExecDeclare)s
+ };
+}};
+
+def template MicroFenceOpConstructor {{
+ inline %(class_name)s::%(class_name)s(
+ ExtMachInst machInst, const char * instMnem, uint64_t setFlags) :
+ %(base_class)s(machInst, "%(mnemonic)s", instMnem,
+ setFlags, %(op_class)s)
+ {
+ %(constructor)s;
+ }
+}};
+
+let {{
+ class MfenceOp(X86Microop):
+ def __init__(self):
+ self.className = "Mfence"
+ self.mnemonic = "mfence"
+ self.instFlags = "| (1ULL << StaticInst::IsMemBarrier)"
+
+ def getAllocator(self, microFlags):
+ allocString = '''
+ (StaticInstPtr)(new %(class_name)s(machInst,
+ macrocodeBlock, %(flags)s))
+ '''
+ allocator = allocString % {
+ "class_name" : self.className,
+ "mnemonic" : self.mnemonic,
+ "flags" : self.microFlagsText(microFlags) + self.instFlags}
+ return allocator
+
+ microopClasses["mfence"] = MfenceOp
+}};
+
+let {{
+ # Build up the all register version of this micro op
+ iop = InstObjParams("mfence", "Mfence", 'X86MicroopBase',
+ {"code" : ""})
+ header_output += MicroFenceOpDeclare.subst(iop)
+ decoder_output += MicroFenceOpConstructor.subst(iop)
+ exec_output += BasicExecute.subst(iop)
+}};