summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configs/ruby/MESI_Three_Level.py3
-rw-r--r--configs/ruby/MESI_Two_Level.py4
-rw-r--r--configs/ruby/MI_example.py4
-rw-r--r--configs/ruby/MOESI_CMP_directory.py4
-rw-r--r--configs/ruby/MOESI_CMP_token.py4
-rw-r--r--configs/ruby/MOESI_hammer.py4
-rw-r--r--configs/ruby/Ruby.py8
-rw-r--r--src/arch/x86/isa/decoder/two_byte_opcodes.isa16
-rw-r--r--src/arch/x86/isa/formats/formats.isa3
-rw-r--r--src/arch/x86/isa/formats/monitor_mwait.isa131
-rw-r--r--src/cpu/SConscript1
-rw-r--r--src/cpu/base.cc83
-rw-r--r--src/cpu/base.hh25
-rw-r--r--src/cpu/base_dyn_inst.hh8
-rw-r--r--src/cpu/checker/cpu.hh7
-rw-r--r--src/cpu/exec_context.hh5
-rw-r--r--src/cpu/inorder/inorder_dyn_inst.cc22
-rw-r--r--src/cpu/inorder/inorder_dyn_inst.hh7
-rw-r--r--src/cpu/minor/exec_context.hh9
-rw-r--r--src/cpu/o3/cpu.cc4
-rw-r--r--src/cpu/o3/cpu.hh4
-rw-r--r--src/cpu/simple/atomic.cc12
-rw-r--r--src/cpu/simple/base.cc2
-rw-r--r--src/cpu/simple/base.hh8
-rw-r--r--src/cpu/simple/timing.cc14
-rw-r--r--src/cpu/simple/timing.hh5
26 files changed, 381 insertions, 16 deletions
diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py
index f9ded25f1..f5a2ddfbe 100644
--- a/configs/ruby/MESI_Three_Level.py
+++ b/configs/ruby/MESI_Three_Level.py
@@ -34,6 +34,7 @@ import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -101,7 +102,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j,
Icache = l0i_cache, Dcache = l0d_cache,
- send_evictions = (options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py
index b7bdd1447..d911d76ef 100644
--- a/configs/ruby/MESI_Two_Level.py
+++ b/configs/ruby/MESI_Two_Level.py
@@ -32,6 +32,7 @@ import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -91,8 +92,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
L1Icache = l1i_cache,
L1Dcache = l1d_cache,
l2_select_num_bits = l2_bits,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
prefetcher = prefetcher,
ruby_system = ruby_system,
clk_domain=system.cpu[i].clk_domain,
diff --git a/configs/ruby/MI_example.py b/configs/ruby/MI_example.py
index 2dd064b55..708e111e6 100644
--- a/configs/ruby/MI_example.py
+++ b/configs/ruby/MI_example.py
@@ -32,6 +32,7 @@ import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the cache latency is only used by the sequencer on fast path hits
@@ -79,8 +80,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
#
l1_cntrl = L1Cache_Controller(version = i,
cacheMemory = cache,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
transitions_per_cycle = options.ports,
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py
index 9c4bab434..14ba33698 100644
--- a/configs/ruby/MOESI_CMP_directory.py
+++ b/configs/ruby/MOESI_CMP_directory.py
@@ -32,6 +32,7 @@ import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -89,8 +90,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
L1Icache = l1i_cache,
L1Dcache = l1d_cache,
l2_select_num_bits = l2_bits,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
transitions_per_cycle = options.ports,
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py
index 26cd625b5..42759b092 100644
--- a/configs/ruby/MOESI_CMP_token.py
+++ b/configs/ruby/MOESI_CMP_token.py
@@ -32,6 +32,7 @@ import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -109,8 +110,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
not options.disable_dyn_timeouts,
no_mig_atomic = not \
options.allow_atomic_migration,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
transitions_per_cycle = options.ports,
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py
index 740c6783e..571a645a6 100644
--- a/configs/ruby/MOESI_hammer.py
+++ b/configs/ruby/MOESI_hammer.py
@@ -32,6 +32,7 @@ import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -102,8 +103,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
L2cache = l2_cache,
no_mig_atomic = not \
options.allow_atomic_migration,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
transitions_per_cycle = options.ports,
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py
index b99e251d3..44d6bdfcc 100644
--- a/configs/ruby/Ruby.py
+++ b/configs/ruby/Ruby.py
@@ -233,6 +233,14 @@ def create_system(options, full_system, system, piobus = None, dma_ports = []):
ruby.num_of_sequencers = len(cpu_sequencers)
ruby.random_seed = options.random_seed
+def send_evicts(options):
+ # currently, 2 scenarios warrant forwarding evictions to the CPU:
+ # 1. The O3 model must keep the LSQ coherent with the caches
+ # 2. The x86 mwait instruction is built on top of coherence invalidations
+ if options.cpu_type == "detailed" or buildEnv['TARGET_ISA'] == 'x86':
+ return True
+ return False
+
# Create a backing copy of physical memory in case required
if options.access_backing_store:
ruby.phys_mem = SimpleMemory(range=AddrRange(options.mem_size),
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
index eb395fce2..081bad971 100644
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -71,8 +71,20 @@
}
0x1: decode MODRM_MOD {
0x3: decode MODRM_RM {
- 0x0: monitor();
- 0x1: mwait();
+ 0x0: MonitorInst::monitor({{
+ xc->armMonitor(Rax);
+ }});
+ 0x1: MwaitInst::mwait({{
+ uint64_t m = 0; //mem
+ unsigned s = 0x8; //size
+ unsigned f = 0; //flags
+ readMemAtomic(xc, traceData,
+ xc->getAddrMonitor()->vAddr,
+ m, s, f);
+ xc->mwaitAtomic(xc->tcBase());
+ MicroHalt hltObj(machInst, mnemonic, 0x0);
+ hltObj.execute(xc, traceData);
+ }});
default: Inst::UD2();
}
default: sidt_Ms();
diff --git a/src/arch/x86/isa/formats/formats.isa b/src/arch/x86/isa/formats/formats.isa
index cc0eb9acf..b5ffd4d59 100644
--- a/src/arch/x86/isa/formats/formats.isa
+++ b/src/arch/x86/isa/formats/formats.isa
@@ -45,6 +45,9 @@
//Include a format to generate a CPUID instruction.
##include "cpuid.isa"
+//Include a format to generate a monitor/mwait instructions.
+##include "monitor_mwait.isa"
+
//Include the "unknown" format
##include "unknown.isa"
diff --git a/src/arch/x86/isa/formats/monitor_mwait.isa b/src/arch/x86/isa/formats/monitor_mwait.isa
new file mode 100644
index 000000000..493b7c58a
--- /dev/null
+++ b/src/arch/x86/isa/formats/monitor_mwait.isa
@@ -0,0 +1,131 @@
+// Copyright (c) AMD
+// All rights reserved.
+//
+// Authors: Marc Orr
+
+// Monitor Instruction
+
+output header {{
+ class MonitorInst : public X86ISA::X86StaticInst
+ {
+ public:
+ static const RegIndex foldOBit = 0;
+ /// Constructor
+ MonitorInst(const char *_mnemonic, ExtMachInst _machInst,
+ OpClass __opClass) :
+ X86ISA::X86StaticInst(_mnemonic, _machInst, __opClass)
+ { }
+
+ std::string generateDisassembly(Addr pc,
+ const SymbolTable *symtab) const;
+ };
+}};
+
+output decoder {{
+ std::string MonitorInst::generateDisassembly(Addr PC,
+ const SymbolTable *symtab) const
+ {
+ std::stringstream response;
+
+ printMnemonic(response, mnemonic);
+ ccprintf(response, " ");
+ printReg(response, _srcRegIdx[0], machInst.opSize);
+ return response.str();
+ }
+}};
+
+def format MonitorInst(code, *opt_flags) {{
+ iop = InstObjParams(name, Name, 'MonitorInst', code, opt_flags)
+ header_output = BasicDeclare.subst(iop)
+ decoder_output = BasicConstructor.subst(iop)
+ decode_block = BasicDecode.subst(iop)
+ exec_output = BasicExecute.subst(iop)
+}};
+
+
+// Mwait instruction
+
+// Declarations for execute() methods.
+def template MwaitExecDeclare {{
+ Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const;
+ Fault initiateAcc(%(CPU_exec_context)s *, Trace::InstRecord *) const;
+ Fault completeAcc(PacketPtr, %(CPU_exec_context)s *,
+ Trace::InstRecord *) const;
+}};
+
+def template MwaitDeclare {{
+ class %(class_name)s : public %(base_class)s
+ {
+ public:
+ // Constructor.
+ %(class_name)s(ExtMachInst machInst);
+ %(MwaitExecDeclare)s
+ };
+}};
+
+def template MwaitInitiateAcc {{
+ Fault %(class_name)s::initiateAcc(CPU_EXEC_CONTEXT * xc,
+ Trace::InstRecord * traceData) const
+ {
+ uint64_t m = 0; //mem
+ unsigned s = 0x8; //size
+ unsigned f = 0; //flags
+ readMemTiming(xc, traceData, xc->getAddrMonitor()->vAddr, m, s, f);
+ return NoFault;
+ }
+}};
+
+def template MwaitCompleteAcc {{
+ Fault %(class_name)s::completeAcc(PacketPtr pkt, CPU_EXEC_CONTEXT *xc,
+ Trace::InstRecord *traceData) const
+ {
+ MicroHalt hltObj(machInst, mnemonic, 0x0);
+ if(xc->mwait(pkt)) {
+ hltObj.execute(xc, traceData);
+ }
+ return NoFault;
+ }
+}};
+
+output header {{
+ class MwaitInst : public X86ISA::X86StaticInst
+ {
+ public:
+ static const RegIndex foldOBit = 0;
+ /// Constructor
+ MwaitInst(const char *_mnemonic, ExtMachInst _machInst,
+ OpClass __opClass) :
+ X86ISA::X86StaticInst(_mnemonic, _machInst, __opClass)
+ {
+ flags[IsMemRef] = 1;
+ flags[IsLoad] = 1;
+ }
+
+ std::string generateDisassembly(Addr pc,
+ const SymbolTable *symtab) const;
+ };
+}};
+
+output decoder {{
+ std::string MwaitInst::generateDisassembly(Addr PC,
+ const SymbolTable *symtab) const
+ {
+ std::stringstream response;
+
+ printMnemonic(response, mnemonic);
+ ccprintf(response, " ");
+ printReg(response, _srcRegIdx[0], machInst.opSize);
+ return response.str();
+ }
+}};
+
+def format MwaitInst(code, *opt_flags) {{
+ iop = InstObjParams(name, Name, 'MwaitInst', code, opt_flags)
+ header_output = MwaitDeclare.subst(iop)
+ decoder_output = BasicConstructor.subst(iop)
+ decode_block = BasicDecode.subst(iop)
+ exec_output = BasicExecute.subst(iop)
+ exec_output += MwaitInitiateAcc.subst(iop)
+ exec_output += MwaitCompleteAcc.subst(iop)
+}};
+
diff --git a/src/cpu/SConscript b/src/cpu/SConscript
index df29f6c73..570f5e2f1 100644
--- a/src/cpu/SConscript
+++ b/src/cpu/SConscript
@@ -102,6 +102,7 @@ DebugFlag('IntrControl')
DebugFlag('O3PipeView')
DebugFlag('PCEvent')
DebugFlag('Quiesce')
+DebugFlag('Mwait')
CompoundFlag('ExecAll', [ 'ExecEnable', 'ExecCPSeq', 'ExecEffAddr',
'ExecFaulting', 'ExecFetchSeq', 'ExecOpClass', 'ExecRegDelta',
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index ea4df2aa8..2f4745ee3 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -55,12 +55,14 @@
#include "base/misc.hh"
#include "base/output.hh"
#include "base/trace.hh"
-#include "cpu/base.hh"
#include "cpu/checker/cpu.hh"
+#include "cpu/base.hh"
#include "cpu/cpuevent.hh"
#include "cpu/profile.hh"
#include "cpu/thread_context.hh"
+#include "debug/Mwait.hh"
#include "debug/SyscallVerbose.hh"
+#include "mem/page_table.hh"
#include "params/BaseCPU.hh"
#include "sim/full_system.hh"
#include "sim/process.hh"
@@ -123,7 +125,8 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
_taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid),
_switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()),
interrupts(p->interrupts), profileEvent(NULL),
- numThreads(p->numThreads), system(p->system)
+ numThreads(p->numThreads), system(p->system),
+ addressMonitor()
{
// if Python did not provide a valid ID, do it here
if (_cpuId == -1 ) {
@@ -261,6 +264,63 @@ BaseCPU::~BaseCPU()
}
void
+BaseCPU::armMonitor(Addr address)
+{
+ addressMonitor.armed = true;
+ addressMonitor.vAddr = address;
+ addressMonitor.pAddr = 0x0;
+ DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address);
+}
+
+bool
+BaseCPU::mwait(PacketPtr pkt)
+{
+ if(addressMonitor.gotWakeup == false) {
+ int block_size = cacheLineSize();
+ uint64_t mask = ~((uint64_t)(block_size - 1));
+
+ assert(pkt->req->hasPaddr());
+ addressMonitor.pAddr = pkt->getAddr() & mask;
+ addressMonitor.waiting = true;
+
+ DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+ addressMonitor.vAddr, addressMonitor.pAddr);
+ return true;
+ } else {
+ addressMonitor.gotWakeup = false;
+ return false;
+ }
+}
+
+void
+BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
+{
+ Request req;
+ Addr addr = addressMonitor.vAddr;
+ int block_size = cacheLineSize();
+ uint64_t mask = ~((uint64_t)(block_size - 1));
+ int size = block_size;
+
+ //The address of the next line if it crosses a cache line boundary.
+ Addr secondAddr = roundDown(addr + size - 1, block_size);
+
+ if (secondAddr > addr)
+ size = secondAddr - addr;
+
+ req.setVirt(0, addr, size, 0x0, dataMasterId(), tc->instAddr());
+
+ // translate to physical address
+ Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read);
+ assert(fault == NoFault);
+
+ addressMonitor.pAddr = req.getPaddr() & mask;
+ addressMonitor.waiting = true;
+
+ DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+ addressMonitor.vAddr, addressMonitor.pAddr);
+}
+
+void
BaseCPU::init()
{
if (!params()->switched_out) {
@@ -618,6 +678,25 @@ BaseCPU::scheduleInstStop(ThreadID tid, Counter insts, const char *cause)
comInstEventQueue[tid]->schedule(event, now + insts);
}
+AddressMonitor::AddressMonitor() {
+ armed = false;
+ waiting = false;
+ gotWakeup = false;
+}
+
+bool AddressMonitor::doMonitor(PacketPtr pkt) {
+ assert(pkt->req->hasPaddr());
+ if(armed && waiting) {
+ if(pAddr == pkt->getAddr()) {
+ DPRINTF(Mwait,"pAddr=0x%lx invalidated: waking up core\n",
+ pkt->getAddr());
+ waiting = false;
+ return true;
+ }
+ }
+ return false;
+}
+
void
BaseCPU::scheduleLoadStop(ThreadID tid, Counter loads, const char *cause)
{
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 75c8f7263..3673a5f18 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -64,11 +64,26 @@
#include "sim/insttracer.hh"
#include "sim/probe/pmu.hh"
#include "sim/system.hh"
+#include "debug/Mwait.hh"
+class BaseCPU;
struct BaseCPUParams;
class CheckerCPU;
class ThreadContext;
+struct AddressMonitor
+{
+ AddressMonitor();
+ bool doMonitor(PacketPtr pkt);
+
+ bool armed;
+ Addr vAddr;
+ Addr pAddr;
+ uint64_t val;
+ bool waiting; // 0=normal, 1=mwaiting
+ bool gotWakeup;
+};
+
class CPUProgressEvent : public Event
{
protected:
@@ -536,6 +551,16 @@ class BaseCPU : public MemObject
Stats::Scalar numCycles;
Stats::Scalar numWorkItemsStarted;
Stats::Scalar numWorkItemsCompleted;
+
+ private:
+ AddressMonitor addressMonitor;
+
+ public:
+ void armMonitor(Addr address);
+ bool mwait(PacketPtr pkt);
+ void mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb);
+ AddressMonitor *getCpuAddrMonitor() { return &addressMonitor; }
+ void atomicNotify(Addr address);
};
#endif // THE_ISA == NULL_ISA
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 289627c9a..af4d238e2 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -853,6 +853,14 @@ class BaseDynInst : public ExecContext, public RefCounted
/** Sets the number of consecutive store conditional failures. */
void setStCondFailures(unsigned int sc_failures)
{ thread->storeCondFailures = sc_failures; }
+
+ public:
+ // monitor/mwait funtions
+ void armMonitor(Addr address) { cpu->armMonitor(address); }
+ bool mwait(PacketPtr pkt) { return cpu->mwait(pkt); }
+ void mwaitAtomic(ThreadContext *tc)
+ { return cpu->mwaitAtomic(tc, cpu->dtb); }
+ AddressMonitor *getAddrMonitor() { return cpu->getCpuAddrMonitor(); }
};
template<class Impl>
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index d684b142b..49f44ff00 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -349,6 +349,13 @@ class CheckerCPU : public BaseCPU, public ExecContext
this->dtb->demapPage(vaddr, asn);
}
+ // monitor/mwait funtions
+ virtual void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
+ bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
+ void mwaitAtomic(ThreadContext *tc)
+ { return BaseCPU::mwaitAtomic(tc, thread->dtb); }
+ AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
+
void demapInstPage(Addr vaddr, uint64_t asn)
{
this->itb->demapPage(vaddr, asn);
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
index c85a746ac..c65841db2 100644
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -47,6 +47,7 @@
#include "arch/registers.hh"
#include "base/types.hh"
#include "config/the_isa.hh"
+#include "cpu/base.hh"
#include "cpu/static_inst_fwd.hh"
#include "cpu/translation.hh"
@@ -243,6 +244,10 @@ class ExecContext {
* Invalidate a page in the DTLB <i>and</i> ITLB.
*/
virtual void demapPage(Addr vaddr, uint64_t asn) = 0;
+ virtual void armMonitor(Addr address) = 0;
+ virtual bool mwait(PacketPtr pkt) = 0;
+ virtual void mwaitAtomic(ThreadContext *tc) = 0;
+ virtual AddressMonitor *getAddrMonitor() = 0;
/** @} */
diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc
index 18281e636..c64cf9da4 100644
--- a/src/cpu/inorder/inorder_dyn_inst.cc
+++ b/src/cpu/inorder/inorder_dyn_inst.cc
@@ -602,3 +602,25 @@ InOrderDynInst::dump(std::string &outstring)
outstring = s.str();
}
+
+void
+InOrderDynInst::armMonitor(Addr address) {
+ cpu->armMonitor(address);
+}
+
+bool
+InOrderDynInst::mwait(PacketPtr pkt) {
+ return cpu->mwait(pkt);
+}
+
+void
+InOrderDynInst::mwaitAtomic(ThreadContext *tc)
+{
+ return cpu->mwaitAtomic(tc, cpu->getDTBPtr());
+}
+
+AddressMonitor *
+InOrderDynInst::getAddrMonitor()
+{
+ return cpu->getCpuAddrMonitor();
+}
diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh
index 369ebe2f4..ebb7bf912 100644
--- a/src/cpu/inorder/inorder_dyn_inst.hh
+++ b/src/cpu/inorder/inorder_dyn_inst.hh
@@ -1077,6 +1077,13 @@ class InOrderDynInst : public ExecContext, public RefCounted
void demapPage(Addr vaddr, uint64_t asn) {
panic("demapPage unimplemented");
}
+
+ public:
+ // monitor/mwait funtions
+ void armMonitor(Addr address);
+ bool mwait(PacketPtr pkt);
+ void mwaitAtomic(ThreadContext *tc);
+ AddressMonitor *getAddrMonitor();
};
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index f1143498e..41345d3bd 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -340,6 +340,15 @@ class ExecContext : public ::ExecContext
- TheISA::Misc_Reg_Base, val);
}
}
+
+ public:
+ // monitor/mwait funtions
+ void armMonitor(Addr address) { getCpuPtr()->armMonitor(address); }
+ bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(pkt); }
+ void mwaitAtomic(ThreadContext *tc)
+ { return getCpuPtr()->mwaitAtomic(tc, thread.dtb); }
+ AddressMonitor *getAddrMonitor()
+ { return getCpuPtr()->getCpuAddrMonitor(); }
};
}
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index fd51cd123..55ef04ffc 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -117,6 +117,10 @@ template <class Impl>
void
FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
lsq->recvTimingSnoopReq(pkt);
}
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 96cd071e4..09b7db867 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -162,11 +162,13 @@ class FullO3CPU : public BaseO3CPU
/** Pointer to LSQ. */
LSQ<Impl> *lsq;
+ FullO3CPU<Impl> *cpu;
public:
/** Default constructor. */
DcachePort(LSQ<Impl> *_lsq, FullO3CPU<Impl>* _cpu)
- : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq)
+ : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq),
+ cpu(_cpu)
{ }
protected:
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index d6dbb9292..e98da3ea7 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -272,6 +272,12 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
pkt->cmdString());
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
+ if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
+
// if snoop invalidates, release any associated locks
if (pkt->isInvalidate()) {
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
@@ -288,6 +294,12 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
pkt->cmdString());
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
+ if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
+
// if snoop invalidates, release any associated locks
if (pkt->isInvalidate()) {
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 60ab53999..636e08899 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -347,6 +347,8 @@ BaseSimpleCPU::dbg_vtophys(Addr addr)
void
BaseSimpleCPU::wakeup()
{
+ getAddrMonitor()->gotWakeup = true;
+
if (thread->status() != ThreadContext::Suspended)
return;
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 8f38a33c8..523bc9776 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -462,6 +462,14 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext
private:
TheISA::PCState pred_pc;
+
+ public:
+ // monitor/mwait funtions
+ void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
+ bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
+ void mwaitAtomic(ThreadContext *tc)
+ { return BaseCPU::mwaitAtomic(tc, thread->dtb); }
+ AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
};
#endif // __CPU_SIMPLE_BASE_HH__
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 84a2c09fd..5bfc9799d 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -58,6 +58,8 @@
#include "sim/full_system.hh"
#include "sim/system.hh"
+#include "debug/Mwait.hh"
+
using namespace std;
using namespace TheISA;
@@ -818,9 +820,21 @@ TimingSimpleCPU::updateCycleCounts()
void
TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
}
+void
+TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt)
+{
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
+}
bool
TimingSimpleCPU::DcachePort::recvTimingResp(PacketPtr pkt)
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index 84c8f7418..52eb6b1ba 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -228,11 +228,16 @@ class TimingSimpleCPU : public BaseSimpleCPU
* a wakeup event on a cpu that is monitoring an address
*/
virtual void recvTimingSnoopReq(PacketPtr pkt);
+ virtual void recvFunctionalSnoop(PacketPtr pkt);
virtual bool recvTimingResp(PacketPtr pkt);
virtual void recvRetry();
+ virtual bool isSnooping() const {
+ return true;
+ }
+
struct DTickEvent : public TickEvent
{
DTickEvent(TimingSimpleCPU *_cpu)