summaryrefslogtreecommitdiff
path: root/src/cpu
diff options
context:
space:
mode:
authorMitch Hayenga <mitch.hayenga@arm.com>2015-09-30 11:14:19 -0500
committerMitch Hayenga <mitch.hayenga@arm.com>2015-09-30 11:14:19 -0500
commitfafa83ed32933fe250d34dfca23fba348429b176 (patch)
tree3bf8fd636f1e879273045fefda3b5d7319a38479 /src/cpu
parent582a0148b441fe9f4a6f977094c5ce6bf7ab6313 (diff)
downloadgem5-fafa83ed32933fe250d34dfca23fba348429b176.tar.xz
cpu: Add per-thread monitors
Adds per-thread address monitors to support FullSystem SMT.
Diffstat (limited to 'src/cpu')
-rw-r--r--src/cpu/base.cc47
-rw-r--r--src/cpu/base.hh15
-rw-r--r--src/cpu/base_dyn_inst.hh9
-rw-r--r--src/cpu/checker/cpu.hh8
-rw-r--r--src/cpu/minor/exec_context.hh8
-rw-r--r--src/cpu/minor/fetch1.cc3
-rw-r--r--src/cpu/minor/lsq.cc3
-rw-r--r--src/cpu/o3/cpu.cc7
-rw-r--r--src/cpu/simple/atomic.cc49
-rw-r--r--src/cpu/simple/atomic.hh3
-rw-r--r--src/cpu/simple/base.cc3
-rw-r--r--src/cpu/simple/exec_context.hh8
-rw-r--r--src/cpu/simple/timing.cc28
-rw-r--r--src/cpu/simple/timing.hh1
14 files changed, 128 insertions, 64 deletions
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 77ac5f2bb..3b0809d09 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -133,7 +133,7 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
numThreads(p->numThreads), system(p->system),
functionTraceStream(nullptr), currentFunctionStart(0),
currentFunctionEnd(0), functionEntryTick(0),
- addressMonitor()
+ addressMonitor(p->numThreads)
{
// if Python did not provide a valid ID, do it here
if (_cpuId == -1 ) {
@@ -271,39 +271,48 @@ BaseCPU::~BaseCPU()
}
void
-BaseCPU::armMonitor(Addr address)
+BaseCPU::armMonitor(ThreadID tid, Addr address)
{
- addressMonitor.armed = true;
- addressMonitor.vAddr = address;
- addressMonitor.pAddr = 0x0;
- DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address);
+ assert(tid < numThreads);
+ AddressMonitor &monitor = addressMonitor[tid];
+
+ monitor.armed = true;
+ monitor.vAddr = address;
+ monitor.pAddr = 0x0;
+ DPRINTF(Mwait,"[tid:%d] Armed monitor (vAddr=0x%lx)\n", tid, address);
}
bool
-BaseCPU::mwait(PacketPtr pkt)
+BaseCPU::mwait(ThreadID tid, PacketPtr pkt)
{
- if(addressMonitor.gotWakeup == false) {
+ assert(tid < numThreads);
+ AddressMonitor &monitor = addressMonitor[tid];
+
+ if(monitor.gotWakeup == false) {
int block_size = cacheLineSize();
uint64_t mask = ~((uint64_t)(block_size - 1));
assert(pkt->req->hasPaddr());
- addressMonitor.pAddr = pkt->getAddr() & mask;
- addressMonitor.waiting = true;
+ monitor.pAddr = pkt->getAddr() & mask;
+ monitor.waiting = true;
- DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
- addressMonitor.vAddr, addressMonitor.pAddr);
+ DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, "
+ "line's paddr=0x%lx)\n", tid, monitor.vAddr, monitor.pAddr);
return true;
} else {
- addressMonitor.gotWakeup = false;
+ monitor.gotWakeup = false;
return false;
}
}
void
-BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
+BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, TheISA::TLB *dtb)
{
+ assert(tid < numThreads);
+ AddressMonitor &monitor = addressMonitor[tid];
+
Request req;
- Addr addr = addressMonitor.vAddr;
+ Addr addr = monitor.vAddr;
int block_size = cacheLineSize();
uint64_t mask = ~((uint64_t)(block_size - 1));
int size = block_size;
@@ -320,11 +329,11 @@ BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read);
assert(fault == NoFault);
- addressMonitor.pAddr = req.getPaddr() & mask;
- addressMonitor.waiting = true;
+ monitor.pAddr = req.getPaddr() & mask;
+ monitor.waiting = true;
- DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
- addressMonitor.vAddr, addressMonitor.pAddr);
+ DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+ tid, monitor.vAddr, monitor.pAddr);
}
void
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 3a10841e0..0286ac45b 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -559,14 +559,17 @@ class BaseCPU : public MemObject
Stats::Scalar numWorkItemsCompleted;
private:
- AddressMonitor addressMonitor;
+ std::vector<AddressMonitor> addressMonitor;
public:
- void armMonitor(Addr address);
- bool mwait(PacketPtr pkt);
- void mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb);
- AddressMonitor *getCpuAddrMonitor() { return &addressMonitor; }
- void atomicNotify(Addr address);
+ void armMonitor(ThreadID tid, Addr address);
+ bool mwait(ThreadID tid, PacketPtr pkt);
+ void mwaitAtomic(ThreadID tid, ThreadContext *tc, TheISA::TLB *dtb);
+ AddressMonitor *getCpuAddrMonitor(ThreadID tid)
+ {
+ assert(tid < numThreads);
+ return &addressMonitor[tid];
+ }
};
#endif // THE_ISA == NULL_ISA
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index c2ef253a7..77117b892 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -863,11 +863,12 @@ class BaseDynInst : public ExecContext, public RefCounted
public:
// monitor/mwait funtions
- void armMonitor(Addr address) { cpu->armMonitor(address); }
- bool mwait(PacketPtr pkt) { return cpu->mwait(pkt); }
+ void armMonitor(Addr address) { cpu->armMonitor(threadNumber, address); }
+ bool mwait(PacketPtr pkt) { return cpu->mwait(threadNumber, pkt); }
void mwaitAtomic(ThreadContext *tc)
- { return cpu->mwaitAtomic(tc, cpu->dtb); }
- AddressMonitor *getAddrMonitor() { return cpu->getCpuAddrMonitor(); }
+ { return cpu->mwaitAtomic(threadNumber, tc, cpu->dtb); }
+ AddressMonitor *getAddrMonitor()
+ { return cpu->getCpuAddrMonitor(threadNumber); }
};
template<class Impl>
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index a363b6d0f..69f47894b 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -350,11 +350,11 @@ class CheckerCPU : public BaseCPU, public ExecContext
}
// monitor/mwait funtions
- virtual void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
- bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
+ virtual void armMonitor(Addr address) { BaseCPU::armMonitor(0, address); }
+ bool mwait(PacketPtr pkt) { return BaseCPU::mwait(0, pkt); }
void mwaitAtomic(ThreadContext *tc)
- { return BaseCPU::mwaitAtomic(tc, thread->dtb); }
- AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
+ { return BaseCPU::mwaitAtomic(0, tc, thread->dtb); }
+ AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(0); }
void demapInstPage(Addr vaddr, uint64_t asn)
{
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index 3e4ea5ea9..625d2b877 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -343,12 +343,12 @@ class ExecContext : public ::ExecContext
public:
// monitor/mwait funtions
- void armMonitor(Addr address) { getCpuPtr()->armMonitor(address); }
- bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(pkt); }
+ void armMonitor(Addr address) { getCpuPtr()->armMonitor(0, address); }
+ bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(0, pkt); }
void mwaitAtomic(ThreadContext *tc)
- { return getCpuPtr()->mwaitAtomic(tc, thread.dtb); }
+ { return getCpuPtr()->mwaitAtomic(0, tc, thread.dtb); }
AddressMonitor *getAddrMonitor()
- { return getCpuPtr()->getCpuAddrMonitor(); }
+ { return getCpuPtr()->getCpuAddrMonitor(0); }
};
}
diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc
index 81fc99d37..84aaf02f5 100644
--- a/src/cpu/minor/fetch1.cc
+++ b/src/cpu/minor/fetch1.cc
@@ -135,7 +135,8 @@ Fetch1::fetchLine()
"%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n",
request_id, aligned_pc, pc, line_offset, request_size);
- request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0);
+ request->request.setThreadContext(cpu.threads[0]->getTC()->contextId(),
+ /* thread id */ 0);
request->request.setVirt(0 /* asid */,
aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(),
/* I've no idea why we need the PC, but give it */
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index 376e8a0ff..e644951f8 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -1501,7 +1501,8 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
if (inst->traceData)
inst->traceData->setMem(addr, size, flags);
- request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0);
+ int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
+ request->request.setThreadContext(cid, /* thread id */ 0);
request->request.setVirt(0 /* asid */,
addr, size, flags, cpu.dataMasterId(),
/* I've no idea why we need the PC, but give it */
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 026907a94..4ab004817 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -118,9 +118,10 @@ template <class Impl>
void
FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{
- // X86 ISA: Snooping an invalidation for monitor/mwait
- if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
- cpu->wakeup();
+ for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+ if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
}
lsq->recvTimingSnoopReq(pkt);
}
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 6690c1da6..2d9da2587 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -86,9 +86,10 @@ AtomicSimpleCPU::init()
{
BaseSimpleCPU::init();
- ifetch_req.setThreadContext(_cpuId, 0);
- data_read_req.setThreadContext(_cpuId, 0);
- data_write_req.setThreadContext(_cpuId, 0);
+ int cid = threadContexts[0]->contextId();
+ ifetch_req.setThreadContext(cid, 0);
+ data_read_req.setThreadContext(cid, 0);
+ data_write_req.setThreadContext(cid, 0);
}
AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
@@ -131,6 +132,24 @@ AtomicSimpleCPU::drain()
}
void
+AtomicSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
+{
+ DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
+ pkt->cmdString());
+
+ for (ThreadID tid = 0; tid < numThreads; tid++) {
+ if (tid != sender) {
+ if(getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ wakeup();
+ }
+
+ TheISA::handleLockedSnoop(threadInfo[tid]->thread,
+ pkt, dcachePort.cacheBlockMask);
+ }
+ }
+}
+
+void
AtomicSimpleCPU::drainResume()
{
assert(!tickEvent.scheduled());
@@ -265,8 +284,11 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
// X86 ISA: Snooping an invalidation for monitor/mwait
AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
- if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
- cpu->wakeup();
+
+ for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+ if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
}
// if snoop invalidates, release any associated locks
@@ -289,8 +311,10 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
// X86 ISA: Snooping an invalidation for monitor/mwait
AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
- if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
- cpu->wakeup();
+ for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+ if(cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
}
// if snoop invalidates, release any associated locks
@@ -460,6 +484,9 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
system->getPhysMem().access(&pkt);
else
dcache_latency += dcachePort.sendAtomic(&pkt);
+
+ // Notify other threads on this CPU of write
+ threadSnoop(&pkt, curThread);
}
dcache_access = true;
assert(!pkt.isError());
@@ -516,9 +543,11 @@ AtomicSimpleCPU::tick()
// Set memroy request ids to current thread
if (numThreads > 1) {
- ifetch_req.setThreadContext(_cpuId, curThread);
- data_read_req.setThreadContext(_cpuId, curThread);
- data_write_req.setThreadContext(_cpuId, curThread);
+ ContextID cid = threadContexts[curThread]->contextId();
+
+ ifetch_req.setThreadContext(cid, curThread);
+ data_read_req.setThreadContext(cid, curThread);
+ data_write_req.setThreadContext(cid, curThread);
}
SimpleExecContext& t_info = *threadInfo[curThread];
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index 76ee9f897..2bea12ab2 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -186,6 +186,9 @@ class AtomicSimpleCPU : public BaseSimpleCPU
/** Return a reference to the instruction port. */
virtual MasterPort &getInstPort() { return icachePort; }
+ /** Perform snoop for other cpu-local thread contexts. */
+ void threadSnoop(PacketPtr pkt, ThreadID sender);
+
public:
DrainState drain() M5_ATTR_OVERRIDE;
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 673cadd77..6e8845bf7 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -418,9 +418,8 @@ BaseSimpleCPU::dbg_vtophys(Addr addr)
void
BaseSimpleCPU::wakeup()
{
- getCpuAddrMonitor()->gotWakeup = true;
-
for (ThreadID tid = 0; tid < numThreads; tid++) {
+ getCpuAddrMonitor(tid)->gotWakeup = true;
if (threadInfo[tid]->thread->status() == ThreadContext::Suspended) {
DPRINTF(Quiesce,"Suspended Processor awoke\n");
threadInfo[tid]->thread->activate();
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index f474cc358..591cf8227 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -376,22 +376,22 @@ class SimpleExecContext : public ExecContext {
void armMonitor(Addr address) M5_ATTR_OVERRIDE
{
- cpu->armMonitor(address);
+ cpu->armMonitor(thread->threadId(), address);
}
bool mwait(PacketPtr pkt) M5_ATTR_OVERRIDE
{
- return cpu->mwait(pkt);
+ return cpu->mwait(thread->threadId(), pkt);
}
void mwaitAtomic(ThreadContext *tc) M5_ATTR_OVERRIDE
{
- cpu->mwaitAtomic(tc, thread->dtb);
+ cpu->mwaitAtomic(thread->threadId(), tc, thread->dtb);
}
AddressMonitor *getAddrMonitor() M5_ATTR_OVERRIDE
{
- return cpu->getCpuAddrMonitor();
+ return cpu->getCpuAddrMonitor(thread->threadId());
}
#if THE_ISA == MIPS_ISA
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 487da36ea..f3241f7e5 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -302,6 +302,7 @@ TimingSimpleCPU::sendData(RequestPtr req, uint8_t *data, uint64_t *res,
if (do_access) {
dcache_pkt = pkt;
handleWritePacket();
+ threadSnoop(pkt, curThread);
} else {
_status = DcacheWaitResponse;
completeDataAccess(pkt);
@@ -538,6 +539,19 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
return NoFault;
}
+void
+TimingSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
+{
+ for (ThreadID tid = 0; tid < numThreads; tid++) {
+ if (tid != sender) {
+ if(getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ wakeup();
+ }
+ TheISA::handleLockedSnoop(threadInfo[tid]->thread, pkt,
+ dcachePort.cacheBlockMask);
+ }
+ }
+}
void
TimingSimpleCPU::finishTranslation(WholeTranslationState *state)
@@ -849,9 +863,10 @@ TimingSimpleCPU::updateCycleCounts()
void
TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{
- // X86 ISA: Snooping an invalidation for monitor/mwait
- if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
- cpu->wakeup();
+ for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+ if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
}
for (auto &t_info : cpu->threadInfo) {
@@ -862,9 +877,10 @@ TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
void
TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt)
{
- // X86 ISA: Snooping an invalidation for monitor/mwait
- if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
- cpu->wakeup();
+ for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+ if(cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
}
}
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index d409ac5d2..f1cc09e42 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -132,6 +132,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
};
FetchTranslation fetchTranslation;
+ void threadSnoop(PacketPtr pkt, ThreadID sender);
void sendData(RequestPtr req, uint8_t *data, uint64_t *res, bool read);
void sendSplitData(RequestPtr req1, RequestPtr req2, RequestPtr req,
uint8_t *data, bool read);