summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Orr <morr@cs.wisc.edu>2014-11-06 05:42:22 -0600
committerMarc Orr <morr@cs.wisc.edu>2014-11-06 05:42:22 -0600
commitbf80734b2ce080cd75f4b57be47e37465e8901f1 (patch)
tree0ba9cbba64cd017e95b5a3f637d58435208ebc3c
parent3947f88d0fa35d2134fa3e999e05bb184a01e396 (diff)
downloadgem5-bf80734b2ce080cd75f4b57be47e37465e8901f1.tar.xz
x86 isa: This patch attempts an implementation at mwait.
Mwait works as follows: 1. A cpu monitors an address of interest (monitor instruction) 2. A cpu calls mwait - this loads the cache line into that cpu's cache. 3. The cpu goes to sleep. 4. When another processor requests write permission for the line, it is evicted from the sleeping cpu's cache. This eviction is forwarded to the sleeping cpu, which then wakes up. Committed by: Nilay Vaish <nilay@cs.wisc.edu>
-rw-r--r--configs/ruby/MESI_Three_Level.py3
-rw-r--r--configs/ruby/MESI_Two_Level.py4
-rw-r--r--configs/ruby/MI_example.py4
-rw-r--r--configs/ruby/MOESI_CMP_directory.py4
-rw-r--r--configs/ruby/MOESI_CMP_token.py4
-rw-r--r--configs/ruby/MOESI_hammer.py4
-rw-r--r--configs/ruby/Ruby.py8
-rw-r--r--src/arch/x86/isa/decoder/two_byte_opcodes.isa16
-rw-r--r--src/arch/x86/isa/formats/formats.isa3
-rw-r--r--src/arch/x86/isa/formats/monitor_mwait.isa131
-rw-r--r--src/cpu/SConscript1
-rw-r--r--src/cpu/base.cc83
-rw-r--r--src/cpu/base.hh25
-rw-r--r--src/cpu/base_dyn_inst.hh8
-rw-r--r--src/cpu/checker/cpu.hh7
-rw-r--r--src/cpu/exec_context.hh5
-rw-r--r--src/cpu/inorder/inorder_dyn_inst.cc22
-rw-r--r--src/cpu/inorder/inorder_dyn_inst.hh7
-rw-r--r--src/cpu/minor/exec_context.hh9
-rw-r--r--src/cpu/o3/cpu.cc4
-rw-r--r--src/cpu/o3/cpu.hh4
-rw-r--r--src/cpu/simple/atomic.cc12
-rw-r--r--src/cpu/simple/base.cc2
-rw-r--r--src/cpu/simple/base.hh8
-rw-r--r--src/cpu/simple/timing.cc14
-rw-r--r--src/cpu/simple/timing.hh5
26 files changed, 381 insertions, 16 deletions
diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py
index f9ded25f1..f5a2ddfbe 100644
--- a/configs/ruby/MESI_Three_Level.py
+++ b/configs/ruby/MESI_Three_Level.py
@@ -34,6 +34,7 @@ import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -101,7 +102,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j,
Icache = l0i_cache, Dcache = l0d_cache,
- send_evictions = (options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py
index b7bdd1447..d911d76ef 100644
--- a/configs/ruby/MESI_Two_Level.py
+++ b/configs/ruby/MESI_Two_Level.py
@@ -32,6 +32,7 @@ import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -91,8 +92,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
L1Icache = l1i_cache,
L1Dcache = l1d_cache,
l2_select_num_bits = l2_bits,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
prefetcher = prefetcher,
ruby_system = ruby_system,
clk_domain=system.cpu[i].clk_domain,
diff --git a/configs/ruby/MI_example.py b/configs/ruby/MI_example.py
index 2dd064b55..708e111e6 100644
--- a/configs/ruby/MI_example.py
+++ b/configs/ruby/MI_example.py
@@ -32,6 +32,7 @@ import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the cache latency is only used by the sequencer on fast path hits
@@ -79,8 +80,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
#
l1_cntrl = L1Cache_Controller(version = i,
cacheMemory = cache,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
transitions_per_cycle = options.ports,
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py
index 9c4bab434..14ba33698 100644
--- a/configs/ruby/MOESI_CMP_directory.py
+++ b/configs/ruby/MOESI_CMP_directory.py
@@ -32,6 +32,7 @@ import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -89,8 +90,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
L1Icache = l1i_cache,
L1Dcache = l1d_cache,
l2_select_num_bits = l2_bits,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
transitions_per_cycle = options.ports,
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py
index 26cd625b5..42759b092 100644
--- a/configs/ruby/MOESI_CMP_token.py
+++ b/configs/ruby/MOESI_CMP_token.py
@@ -32,6 +32,7 @@ import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -109,8 +110,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
not options.disable_dyn_timeouts,
no_mig_atomic = not \
options.allow_atomic_migration,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
transitions_per_cycle = options.ports,
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py
index 740c6783e..571a645a6 100644
--- a/configs/ruby/MOESI_hammer.py
+++ b/configs/ruby/MOESI_hammer.py
@@ -32,6 +32,7 @@ import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -102,8 +103,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
L2cache = l2_cache,
no_mig_atomic = not \
options.allow_atomic_migration,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
transitions_per_cycle = options.ports,
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py
index b99e251d3..44d6bdfcc 100644
--- a/configs/ruby/Ruby.py
+++ b/configs/ruby/Ruby.py
@@ -233,6 +233,14 @@ def create_system(options, full_system, system, piobus = None, dma_ports = []):
ruby.num_of_sequencers = len(cpu_sequencers)
ruby.random_seed = options.random_seed
+def send_evicts(options):
+ # currently, 2 scenarios warrant forwarding evictions to the CPU:
+ # 1. The O3 model must keep the LSQ coherent with the caches
+ # 2. The x86 mwait instruction is built on top of coherence invalidations
+ if options.cpu_type == "detailed" or buildEnv['TARGET_ISA'] == 'x86':
+ return True
+ return False
+
# Create a backing copy of physical memory in case required
if options.access_backing_store:
ruby.phys_mem = SimpleMemory(range=AddrRange(options.mem_size),
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
index eb395fce2..081bad971 100644
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -71,8 +71,20 @@
}
0x1: decode MODRM_MOD {
0x3: decode MODRM_RM {
- 0x0: monitor();
- 0x1: mwait();
+ 0x0: MonitorInst::monitor({{
+ xc->armMonitor(Rax);
+ }});
+ 0x1: MwaitInst::mwait({{
+ uint64_t m = 0; //mem
+ unsigned s = 0x8; //size
+ unsigned f = 0; //flags
+ readMemAtomic(xc, traceData,
+ xc->getAddrMonitor()->vAddr,
+ m, s, f);
+ xc->mwaitAtomic(xc->tcBase());
+ MicroHalt hltObj(machInst, mnemonic, 0x0);
+ hltObj.execute(xc, traceData);
+ }});
default: Inst::UD2();
}
default: sidt_Ms();
diff --git a/src/arch/x86/isa/formats/formats.isa b/src/arch/x86/isa/formats/formats.isa
index cc0eb9acf..b5ffd4d59 100644
--- a/src/arch/x86/isa/formats/formats.isa
+++ b/src/arch/x86/isa/formats/formats.isa
@@ -45,6 +45,9 @@
//Include a format to generate a CPUID instruction.
##include "cpuid.isa"
+//Include a format to generate a monitor/mwait instructions.
+##include "monitor_mwait.isa"
+
//Include the "unknown" format
##include "unknown.isa"
diff --git a/src/arch/x86/isa/formats/monitor_mwait.isa b/src/arch/x86/isa/formats/monitor_mwait.isa
new file mode 100644
index 000000000..493b7c58a
--- /dev/null
+++ b/src/arch/x86/isa/formats/monitor_mwait.isa
@@ -0,0 +1,131 @@
+// Copyright (c) AMD
+// All rights reserved.
+//
+// Authors: Marc Orr
+
+// Monitor Instruction
+
+output header {{
+ class MonitorInst : public X86ISA::X86StaticInst
+ {
+ public:
+ static const RegIndex foldOBit = 0;
+ /// Constructor
+ MonitorInst(const char *_mnemonic, ExtMachInst _machInst,
+ OpClass __opClass) :
+ X86ISA::X86StaticInst(_mnemonic, _machInst, __opClass)
+ { }
+
+ std::string generateDisassembly(Addr pc,
+ const SymbolTable *symtab) const;
+ };
+}};
+
+output decoder {{
+ std::string MonitorInst::generateDisassembly(Addr PC,
+ const SymbolTable *symtab) const
+ {
+ std::stringstream response;
+
+ printMnemonic(response, mnemonic);
+ ccprintf(response, " ");
+ printReg(response, _srcRegIdx[0], machInst.opSize);
+ return response.str();
+ }
+}};
+
+def format MonitorInst(code, *opt_flags) {{
+ iop = InstObjParams(name, Name, 'MonitorInst', code, opt_flags)
+ header_output = BasicDeclare.subst(iop)
+ decoder_output = BasicConstructor.subst(iop)
+ decode_block = BasicDecode.subst(iop)
+ exec_output = BasicExecute.subst(iop)
+}};
+
+
+// Mwait instruction
+
+// Declarations for execute() methods.
+def template MwaitExecDeclare {{
+ Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const;
+ Fault initiateAcc(%(CPU_exec_context)s *, Trace::InstRecord *) const;
+ Fault completeAcc(PacketPtr, %(CPU_exec_context)s *,
+ Trace::InstRecord *) const;
+}};
+
+def template MwaitDeclare {{
+ class %(class_name)s : public %(base_class)s
+ {
+ public:
+ // Constructor.
+ %(class_name)s(ExtMachInst machInst);
+ %(MwaitExecDeclare)s
+ };
+}};
+
+def template MwaitInitiateAcc {{
+ Fault %(class_name)s::initiateAcc(CPU_EXEC_CONTEXT * xc,
+ Trace::InstRecord * traceData) const
+ {
+ uint64_t m = 0; //mem
+ unsigned s = 0x8; //size
+ unsigned f = 0; //flags
+ readMemTiming(xc, traceData, xc->getAddrMonitor()->vAddr, m, s, f);
+ return NoFault;
+ }
+}};
+
+def template MwaitCompleteAcc {{
+ Fault %(class_name)s::completeAcc(PacketPtr pkt, CPU_EXEC_CONTEXT *xc,
+ Trace::InstRecord *traceData) const
+ {
+ MicroHalt hltObj(machInst, mnemonic, 0x0);
+ if(xc->mwait(pkt)) {
+ hltObj.execute(xc, traceData);
+ }
+ return NoFault;
+ }
+}};
+
+output header {{
+ class MwaitInst : public X86ISA::X86StaticInst
+ {
+ public:
+ static const RegIndex foldOBit = 0;
+ /// Constructor
+ MwaitInst(const char *_mnemonic, ExtMachInst _machInst,
+ OpClass __opClass) :
+ X86ISA::X86StaticInst(_mnemonic, _machInst, __opClass)
+ {
+ flags[IsMemRef] = 1;
+ flags[IsLoad] = 1;
+ }
+
+ std::string generateDisassembly(Addr pc,
+ const SymbolTable *symtab) const;
+ };
+}};
+
+output decoder {{
+ std::string MwaitInst::generateDisassembly(Addr PC,
+ const SymbolTable *symtab) const
+ {
+ std::stringstream response;
+
+ printMnemonic(response, mnemonic);
+ ccprintf(response, " ");
+ printReg(response, _srcRegIdx[0], machInst.opSize);
+ return response.str();
+ }
+}};
+
+def format MwaitInst(code, *opt_flags) {{
+ iop = InstObjParams(name, Name, 'MwaitInst', code, opt_flags)
+ header_output = MwaitDeclare.subst(iop)
+ decoder_output = BasicConstructor.subst(iop)
+ decode_block = BasicDecode.subst(iop)
+ exec_output = BasicExecute.subst(iop)
+ exec_output += MwaitInitiateAcc.subst(iop)
+ exec_output += MwaitCompleteAcc.subst(iop)
+}};
+
diff --git a/src/cpu/SConscript b/src/cpu/SConscript
index df29f6c73..570f5e2f1 100644
--- a/src/cpu/SConscript
+++ b/src/cpu/SConscript
@@ -102,6 +102,7 @@ DebugFlag('IntrControl')
DebugFlag('O3PipeView')
DebugFlag('PCEvent')
DebugFlag('Quiesce')
+DebugFlag('Mwait')
CompoundFlag('ExecAll', [ 'ExecEnable', 'ExecCPSeq', 'ExecEffAddr',
'ExecFaulting', 'ExecFetchSeq', 'ExecOpClass', 'ExecRegDelta',
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index ea4df2aa8..2f4745ee3 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -55,12 +55,14 @@
#include "base/misc.hh"
#include "base/output.hh"
#include "base/trace.hh"
-#include "cpu/base.hh"
#include "cpu/checker/cpu.hh"
+#include "cpu/base.hh"
#include "cpu/cpuevent.hh"
#include "cpu/profile.hh"
#include "cpu/thread_context.hh"
+#include "debug/Mwait.hh"
#include "debug/SyscallVerbose.hh"
+#include "mem/page_table.hh"
#include "params/BaseCPU.hh"
#include "sim/full_system.hh"
#include "sim/process.hh"
@@ -123,7 +125,8 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
_taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid),
_switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()),
interrupts(p->interrupts), profileEvent(NULL),
- numThreads(p->numThreads), system(p->system)
+ numThreads(p->numThreads), system(p->system),
+ addressMonitor()
{
// if Python did not provide a valid ID, do it here
if (_cpuId == -1 ) {
@@ -261,6 +264,63 @@ BaseCPU::~BaseCPU()
}
void
+BaseCPU::armMonitor(Addr address)
+{
+ addressMonitor.armed = true;
+ addressMonitor.vAddr = address;
+ addressMonitor.pAddr = 0x0;
+ DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address);
+}
+
+bool
+BaseCPU::mwait(PacketPtr pkt)
+{
+ if(addressMonitor.gotWakeup == false) {
+ int block_size = cacheLineSize();
+ uint64_t mask = ~((uint64_t)(block_size - 1));
+
+ assert(pkt->req->hasPaddr());
+ addressMonitor.pAddr = pkt->getAddr() & mask;
+ addressMonitor.waiting = true;
+
+ DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+ addressMonitor.vAddr, addressMonitor.pAddr);
+ return true;
+ } else {
+ addressMonitor.gotWakeup = false;
+ return false;
+ }
+}
+
+void
+BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
+{
+ Request req;
+ Addr addr = addressMonitor.vAddr;
+ int block_size = cacheLineSize();
+ uint64_t mask = ~((uint64_t)(block_size - 1));
+ int size = block_size;
+
+ //The address of the next line if it crosses a cache line boundary.
+ Addr secondAddr = roundDown(addr + size - 1, block_size);
+
+ if (secondAddr > addr)
+ size = secondAddr - addr;
+
+ req.setVirt(0, addr, size, 0x0, dataMasterId(), tc->instAddr());
+
+ // translate to physical address
+ Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read);
+ assert(fault == NoFault);
+
+ addressMonitor.pAddr = req.getPaddr() & mask;
+ addressMonitor.waiting = true;
+
+ DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+ addressMonitor.vAddr, addressMonitor.pAddr);
+}
+
+void
BaseCPU::init()
{
if (!params()->switched_out) {
@@ -618,6 +678,25 @@ BaseCPU::scheduleInstStop(ThreadID tid, Counter insts, const char *cause)
comInstEventQueue[tid]->schedule(event, now + insts);
}
+AddressMonitor::AddressMonitor() {
+ armed = false;
+ waiting = false;
+ gotWakeup = false;
+}
+
+bool AddressMonitor::doMonitor(PacketPtr pkt) {
+ assert(pkt->req->hasPaddr());
+ if(armed && waiting) {
+ if(pAddr == pkt->getAddr()) {
+ DPRINTF(Mwait,"pAddr=0x%lx invalidated: waking up core\n",
+ pkt->getAddr());
+ waiting = false;
+ return true;
+ }
+ }
+ return false;
+}
+
void
BaseCPU::scheduleLoadStop(ThreadID tid, Counter loads, const char *cause)
{
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 75c8f7263..3673a5f18 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -64,11 +64,26 @@
#include "sim/insttracer.hh"
#include "sim/probe/pmu.hh"
#include "sim/system.hh"
+#include "debug/Mwait.hh"
+class BaseCPU;
struct BaseCPUParams;
class CheckerCPU;
class ThreadContext;
+struct AddressMonitor
+{
+ AddressMonitor();
+ bool doMonitor(PacketPtr pkt);
+
+ bool armed;
+ Addr vAddr;
+ Addr pAddr;
+ uint64_t val;
+ bool waiting; // 0=normal, 1=mwaiting
+ bool gotWakeup;
+};
+
class CPUProgressEvent : public Event
{
protected:
@@ -536,6 +551,16 @@ class BaseCPU : public MemObject
Stats::Scalar numCycles;
Stats::Scalar numWorkItemsStarted;
Stats::Scalar numWorkItemsCompleted;
+
+ private:
+ AddressMonitor addressMonitor;
+
+ public:
+ void armMonitor(Addr address);
+ bool mwait(PacketPtr pkt);
+ void mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb);
+ AddressMonitor *getCpuAddrMonitor() { return &addressMonitor; }
+ void atomicNotify(Addr address);
};
#endif // THE_ISA == NULL_ISA
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 289627c9a..af4d238e2 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -853,6 +853,14 @@ class BaseDynInst : public ExecContext, public RefCounted
/** Sets the number of consecutive store conditional failures. */
void setStCondFailures(unsigned int sc_failures)
{ thread->storeCondFailures = sc_failures; }
+
+ public:
+ // monitor/mwait funtions
+ void armMonitor(Addr address) { cpu->armMonitor(address); }
+ bool mwait(PacketPtr pkt) { return cpu->mwait(pkt); }
+ void mwaitAtomic(ThreadContext *tc)
+ { return cpu->mwaitAtomic(tc, cpu->dtb); }
+ AddressMonitor *getAddrMonitor() { return cpu->getCpuAddrMonitor(); }
};
template<class Impl>
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index d684b142b..49f44ff00 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -349,6 +349,13 @@ class CheckerCPU : public BaseCPU, public ExecContext
this->dtb->demapPage(vaddr, asn);
}
+ // monitor/mwait funtions
+ virtual void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
+ bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
+ void mwaitAtomic(ThreadContext *tc)
+ { return BaseCPU::mwaitAtomic(tc, thread->dtb); }
+ AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
+
void demapInstPage(Addr vaddr, uint64_t asn)
{
this->itb->demapPage(vaddr, asn);
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
index c85a746ac..c65841db2 100644
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -47,6 +47,7 @@
#include "arch/registers.hh"
#include "base/types.hh"
#include "config/the_isa.hh"
+#include "cpu/base.hh"
#include "cpu/static_inst_fwd.hh"
#include "cpu/translation.hh"
@@ -243,6 +244,10 @@ class ExecContext {
* Invalidate a page in the DTLB <i>and</i> ITLB.
*/
virtual void demapPage(Addr vaddr, uint64_t asn) = 0;
+ virtual void armMonitor(Addr address) = 0;
+ virtual bool mwait(PacketPtr pkt) = 0;
+ virtual void mwaitAtomic(ThreadContext *tc) = 0;
+ virtual AddressMonitor *getAddrMonitor() = 0;
/** @} */
diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc
index 18281e636..c64cf9da4 100644
--- a/src/cpu/inorder/inorder_dyn_inst.cc
+++ b/src/cpu/inorder/inorder_dyn_inst.cc
@@ -602,3 +602,25 @@ InOrderDynInst::dump(std::string &outstring)
outstring = s.str();
}
+
+void
+InOrderDynInst::armMonitor(Addr address) {
+ cpu->armMonitor(address);
+}
+
+bool
+InOrderDynInst::mwait(PacketPtr pkt) {
+ return cpu->mwait(pkt);
+}
+
+void
+InOrderDynInst::mwaitAtomic(ThreadContext *tc)
+{
+ return cpu->mwaitAtomic(tc, cpu->getDTBPtr());
+}
+
+AddressMonitor *
+InOrderDynInst::getAddrMonitor()
+{
+ return cpu->getCpuAddrMonitor();
+}
diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh
index 369ebe2f4..ebb7bf912 100644
--- a/src/cpu/inorder/inorder_dyn_inst.hh
+++ b/src/cpu/inorder/inorder_dyn_inst.hh
@@ -1077,6 +1077,13 @@ class InOrderDynInst : public ExecContext, public RefCounted
void demapPage(Addr vaddr, uint64_t asn) {
panic("demapPage unimplemented");
}
+
+ public:
+ // monitor/mwait funtions
+ void armMonitor(Addr address);
+ bool mwait(PacketPtr pkt);
+ void mwaitAtomic(ThreadContext *tc);
+ AddressMonitor *getAddrMonitor();
};
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index f1143498e..41345d3bd 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -340,6 +340,15 @@ class ExecContext : public ::ExecContext
- TheISA::Misc_Reg_Base, val);
}
}
+
+ public:
+ // monitor/mwait funtions
+ void armMonitor(Addr address) { getCpuPtr()->armMonitor(address); }
+ bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(pkt); }
+ void mwaitAtomic(ThreadContext *tc)
+ { return getCpuPtr()->mwaitAtomic(tc, thread.dtb); }
+ AddressMonitor *getAddrMonitor()
+ { return getCpuPtr()->getCpuAddrMonitor(); }
};
}
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index fd51cd123..55ef04ffc 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -117,6 +117,10 @@ template <class Impl>
void
FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
lsq->recvTimingSnoopReq(pkt);
}
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 96cd071e4..09b7db867 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -162,11 +162,13 @@ class FullO3CPU : public BaseO3CPU
/** Pointer to LSQ. */
LSQ<Impl> *lsq;
+ FullO3CPU<Impl> *cpu;
public:
/** Default constructor. */
DcachePort(LSQ<Impl> *_lsq, FullO3CPU<Impl>* _cpu)
- : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq)
+ : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq),
+ cpu(_cpu)
{ }
protected:
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index d6dbb9292..e98da3ea7 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -272,6 +272,12 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
pkt->cmdString());
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
+ if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
+
// if snoop invalidates, release any associated locks
if (pkt->isInvalidate()) {
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
@@ -288,6 +294,12 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
pkt->cmdString());
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
+ if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
+
// if snoop invalidates, release any associated locks
if (pkt->isInvalidate()) {
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 60ab53999..636e08899 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -347,6 +347,8 @@ BaseSimpleCPU::dbg_vtophys(Addr addr)
void
BaseSimpleCPU::wakeup()
{
+ getAddrMonitor()->gotWakeup = true;
+
if (thread->status() != ThreadContext::Suspended)
return;
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 8f38a33c8..523bc9776 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -462,6 +462,14 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext
private:
TheISA::PCState pred_pc;
+
+ public:
+ // monitor/mwait funtions
+ void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
+ bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
+ void mwaitAtomic(ThreadContext *tc)
+ { return BaseCPU::mwaitAtomic(tc, thread->dtb); }
+ AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
};
#endif // __CPU_SIMPLE_BASE_HH__
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 84a2c09fd..5bfc9799d 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -58,6 +58,8 @@
#include "sim/full_system.hh"
#include "sim/system.hh"
+#include "debug/Mwait.hh"
+
using namespace std;
using namespace TheISA;
@@ -818,9 +820,21 @@ TimingSimpleCPU::updateCycleCounts()
void
TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
}
+void
+TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt)
+{
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
+}
bool
TimingSimpleCPU::DcachePort::recvTimingResp(PacketPtr pkt)
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index 84c8f7418..52eb6b1ba 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -228,11 +228,16 @@ class TimingSimpleCPU : public BaseSimpleCPU
* a wakeup event on a cpu that is monitoring an address
*/
virtual void recvTimingSnoopReq(PacketPtr pkt);
+ virtual void recvFunctionalSnoop(PacketPtr pkt);
virtual bool recvTimingResp(PacketPtr pkt);
virtual void recvRetry();
+ virtual bool isSnooping() const {
+ return true;
+ }
+
struct DTickEvent : public TickEvent
{
DTickEvent(TimingSimpleCPU *_cpu)